# The Palindrome: 1111111111 is a random sequence
This is the *Jupyter Notebook* referenced in the article: [1111111111 is a random sequence](https://thepalindrome.substack.com/p/1111111111-is-a-random-sequence) by **The Palindrome**.

In [None]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import patheffects
from itertools import groupby
from collections import defaultdict

In [None]:
def random_sequence(size):
    return np.random.choice([0, 1], size=size)


def longest_run(X):
    # source: https://stackoverflow.com/questions/22214086/python-a-program-to-find-the-length-of-the-longest-run-in-a-given-list
    return max(sum(1 for _ in l) for n, l in groupby(X))

# Longest runs for 0-1 sequences of 10 characters

In [None]:
n_sample = 1000000
runs_10 = [longest_run(random_sequence(10)) for _ in range(n_sample)]
_, counts_10 = np.unique(runs_10, return_counts=True)
pmf_10 = counts_10/n_sample

In [None]:
with plt.xkcd():
    fig = plt.figure(figsize=(18, 12))

    # style
    plt.rcParams['path.effects'] = [patheffects.withStroke(linewidth=0)]
    plt.style.use('dark_background')
    fig.set_facecolor("#111111")
    ax = fig.add_subplot(111)
    ax.set_facecolor("#111111")

    # substance
    plt.bar(range(1, len(pmf_10)+1), pmf_10)  
    plt.title("The distribution of longest runs in a 0-1 sequence of 10 characters", fontsize="24")
    plt.xlabel("longest run", fontsize="20")
    plt.ylabel("probability", fontsize="20")
    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)

## The expected value of largest runs in 10-long random 0-1 sequences

In [None]:
ev_10 = np.sum(np.array(range(1, 11))*pmf_10)

In [None]:
ev_10

## Probability of at least *k*-length runs in 10-long random 0-1 sequences

In [None]:
tail_10 = [sum(pmf_10[i:]) for i in range(10)]

In [None]:
tail_10

In [None]:
with plt.xkcd():
    fig = plt.figure(figsize=(18, 12))

    # style
    plt.rcParams['path.effects'] = [patheffects.withStroke(linewidth=0)]
    plt.style.use('dark_background')
    fig.set_facecolor("#111111")
    ax = fig.add_subplot(111)
    ax.set_facecolor("#111111")

    # substance
    plt.bar(range(1, len(tail_10)+1), tail_10)  
    plt.title("The probability of at least k-long runs", fontsize="24")
    plt.xlabel("k", fontsize="20")
    plt.ylabel("probability", fontsize="20")
    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)

# Longest runs for 0-1 sequences of 100 characters

In [None]:
n_sample = 1000000
runs_100 = [longest_run(random_sequence(100)) for _ in range(n_sample)]
_, counts_100 = np.unique(runs_100, return_counts=True)
pmf_100 = counts_100/n_sample

In [None]:
with plt.xkcd():
    fig = plt.figure(figsize=(18, 12))

    # style
    plt.rcParams['path.effects'] = [patheffects.withStroke(linewidth=0)]
    plt.style.use('dark_background')
    fig.set_facecolor("#111111")
    ax = fig.add_subplot(111)
    ax.set_facecolor("#111111")

    # substance
    plt.bar(range(1, len(pmf_100)+1), pmf_100)  
    plt.title("The distribution of longest runs in a 0-1 sequence of 100 characters", fontsize="24")
    plt.xlabel("longest run", fontsize="20")
    plt.ylabel("probability", fontsize="20")
    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)

# Longest runs for 0-1 sequences of 1000 characters

In [None]:
n_sample = 1000000
runs_1000 = np.array([longest_run(random_sequence(1000)) for _ in range(n_sample)])
_, counts_1000 = np.unique(runs_1000, return_counts=True)
pmf_1000 = counts_1000/n_sample

In [None]:
with plt.xkcd():
    fig = plt.figure(figsize=(18, 12))

    # style
    plt.rcParams['path.effects'] = [patheffects.withStroke(linewidth=0)]
    plt.style.use('dark_background')
    fig.set_facecolor("#111111")
    ax = fig.add_subplot(111)
    ax.set_facecolor("#111111")

    # substance
    plt.bar(range(1, len(pmf_1000)+1), pmf_1000)  
    plt.title("The distribution of longest runs in a 0-1 sequence of 1000 characters", fontsize="24")
    plt.xlabel("longest run", fontsize="20")
    plt.ylabel("probability", fontsize="20")
    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)

# Twitter data

In [None]:
twitter_data = ["0001101101", "1000011001", "1011001110", "0110101010", "1101101010", "1001011110", "1011001110", "1111101111", "0110001011", "0110111100", "0000110001", "0110100010", "1010110101", "0111010010", "0111110000", "0100111000", "0110010000", "1011011100", "1111111111", "1100000001", "0101110110", "0101100111", "1001110011", "0001001010", "1111110111", "1001000001", "0011100011", "1101011111", "0010110111", "1001100010", "0101110011", "1010001000", "0001001011", "1011111111", "1110011001", "0110010110", "1111111011", "1000010111", "0110011010", "1111111101", "0000000010", "0010110101", "1010110100", "0110111100", "0110000111", "0010111000", "1000111110", "0010110011", "0110101111", "1111010011", "0100100010", "1010101010", "0010111011", "0101011101", "1111111111", "0000011111", "0000001000", "1000010001", "1001111010", "0110111010", "1011000101", "0010111001", "0010100010", "0100111111", "1111111111", "0101010111", "1011100101", "0000100001", "1100001010", "0000100111", "0001011110", "0000000000", "0100011000", "1001000111", "0111010110", "0000110011", "1011010101", "0110101001", "1000111011", "0101101010", "0100110101", "0101010110", "0101001001", "0111101000", "1101000101", "1000000001", "1110101011", "1110110011", "1011010001", "1110011010", "1101110100", "1101111000", "1101110100", "0100110111", "0110010110", "1001010110", "0100100010", "1001010111", "1011010110", "1001110001", "0100111001", "0010001000", "1110100111", "0100110000", "1110010000", "0100010110", "1011001010", "1011001110", "1011110011", "0110110010", "1001001010", "0111001001", "0001110001", "0110111001", "1111110000", "1010010011", "0100110010", "0000000100", "1101011110", "1011101001", "0110101001", "0110001001", "0001101110", "0010110101", "1101110001", "1010100000", "0001110100", "1101101001", "0011001000", "0010110111", "1101001011", "1001011010", "0111010101", "1000101011", "0001010101", "0001011001", "0111101110", "0011010010", "0101110110", "1110010110", "0100011100", "1001101010", "1001100011", "0100101111", "1011000111", "1011010110", "1010010101", "0110110111", "1010100111", "0010111011", "1100010010", "1010000110", "1101011100", "1001010111", "1101011101", "1011010101", "1001000111", "0110111000", "1010001001", "1101011110", "0110111011", "1110110100", "1100000101", "0010001101", "1011110100", "1001001010", "1110001010", "0111001001", "1001100101", "1111111111", "1000001100", "0010101001", "0111000101", "1011111111", "0000011001", "1010100001", "1001110101", "1101110100", "0101110100", "1001111011", "0000000001", "1000010110", "1101110110", "1011000110", "1111111111", "1110011011", "1100001101", "0100101101", "1110101001", "0101001011", "0000000000", "1000101101", "1010101011", "1000101101", "0111001001", "0100110111", "1011001000", "1011010101"]

In [None]:
runs_twitter = [longest_run(seq) for seq in twitter_data]
_, counts_twitter = np.unique(runs_twitter, return_counts=True)
pmf_twitter = counts_twitter/sum(counts_twitter)

In [None]:
with plt.xkcd():
    fig = plt.figure(figsize=(18, 12))

    # style
    plt.rcParams['path.effects'] = [patheffects.withStroke(linewidth=0)]
    plt.style.use('dark_background')
    fig.set_facecolor("#111111")
    ax = fig.add_subplot(111)
    ax.set_facecolor("#111111")

    # substance
    plt.bar(range(1, len(pmf_twitter)+1), pmf_twitter) 
    plt.title("The distribution of longest runs in human-generated 0-1 sequences of 10 characters", fontsize="24")
    plt.xlabel("longest run", fontsize="20")
    plt.ylabel("probability", fontsize="20")
    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)

## The expected value of largest runs in 10-long human-generated 0-1 sequences

In [None]:
ev_twitter = np.sum(np.array(range(1, 11))*pmf_twitter)

In [None]:
ev_twitter

## Plotting the probability distribution of simulated vs human-generated runs

In [None]:
df_twitter = pd.DataFrame({"x": runs_twitter, "label": ["Manually generated" for _ in runs_twitter]})
df_10 = pd.DataFrame({"x": runs_10, "label": ["True random" for _ in runs_10]})
df = pd.concat([df_twitter, df_10], axis=0, ignore_index=True)

In [None]:
import seaborn as sns


with plt.xkcd():
    fig, ax = plt.subplots(figsize=(18, 12))

    # style
    plt.rcParams['path.effects'] = [patheffects.withStroke(linewidth=0)]
    plt.style.use('dark_background')
    fig.set_facecolor("#111111")
    ax.set_facecolor("#111111")

    # substance
    sns.despine(fig)
    sns.histplot(
    df, x="x", hue="label",
    bins=range(11),
    multiple="layer",
    palette="light:m_r",
    edgecolor=".5",
    linewidth=1,
    stat="proportion",
    discrete=True,
    common_norm=False
    )
    ax.xaxis.set_major_formatter(mpl.ticker.ScalarFormatter())
    plt.title("The distribution of longest runs in randomly vs manually generated 0-1 sequences", fontsize=24)
    plt.xlabel("longest run", fontsize=20)
    plt.ylabel("probability", fontsize=20)
    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)

## Pearson's chi-squared test

In [None]:
from scipy.stats import chisquare

chisquare(pmf_twitter, pmf_10)

# The theorem of Erdős and Rényi

In [None]:
def max_running_sum(seq, window_size):
    running_sum = np.convolve(seq, np.ones(window_size, dtype=int), 'valid')
    return np.max(running_sum)

In [None]:
n_sample = 100000
seq_len = 32
window_size = int(np.log2(seq_len))

max_running_sum_32 = np.array([max_running_sum(random_sequence(seq_len), window_size) for _ in range(n_sample)])
counts_max_running_sum_32 = defaultdict(lambda : 0, {count: value for (count, value) in zip(*np.unique(max_running_sum_32, return_counts=True))})
pmf_max_running_sum_32 = [counts_max_running_sum_32[i]/n_sample for i in range(0, window_size+1)]

In [None]:
seq_len = 128
window_size = int(np.log2(seq_len))

max_running_sum_128 = np.array([max_running_sum(random_sequence(seq_len), window_size) for _ in range(n_sample)])
counts_max_running_sum_128 = defaultdict(lambda : 0, {count: value for (count, value) in zip(*np.unique(max_running_sum_128, return_counts=True))})
pmf_max_running_sum_128 = [counts_max_running_sum_128[i]/n_sample for i in range(0, window_size+1)]

In [None]:
seq_len = 1024
window_size = int(np.log2(seq_len))

max_running_sum_1024 = np.array([max_running_sum(random_sequence(seq_len), window_size) for _ in range(n_sample)])
counts_max_running_sum_1024 = defaultdict(lambda : 0, {count: value for (count, value) in zip(*np.unique(max_running_sum_1024, return_counts=True))})
pmf_max_running_sum_1024 = [counts_max_running_sum_1024[i]/n_sample for i in range(0, window_size+1)]

In [None]:
seq_len = 8192
window_size = int(np.log2(seq_len))

max_running_sum_8192 = np.array([max_running_sum(random_sequence(seq_len), window_size) for _ in range(n_sample)])
counts_max_running_sum_8192 = defaultdict(lambda : 0, {count: value for (count, value) in zip(*np.unique(max_running_sum_8192, return_counts=True))})
pmf_max_running_sum_8192 = [counts_max_running_sum_8192[i]/n_sample for i in range(0, window_size+1)]

In [None]:
with plt.xkcd():
    fig = plt.figure(figsize=(8, 8))

    # style
    plt.rcParams['path.effects'] = [patheffects.withStroke(linewidth=0)]
    plt.style.use('dark_background')
    fig.set_facecolor("#111111")
    ax = fig.add_subplot(111)
    ax.set_facecolor("#111111")

    # substance
    plt.bar(range(len(pmf_max_running_sum_128)), pmf_max_running_sum_128)
    plt.title("length = 2^7")
    plt.xlabel("Maximal running sum in a 7-long window", fontsize="20")
    plt.ylabel("probability", fontsize="20")
    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)