This notebook can be used to analyse the number of terms for the sequences given in the OEIS.

In [None]:
sequences_folder = "all_sequences" # folder which contains pickle files of all sequences

In [None]:
import os
import pickle
from collections import Counter

In [None]:
directory = os.fsencode(sequences_folder)
number_of_terms = []
number_of_sequences = 0
    
for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.endswith(".pickle") : 
        filename = os.path.join(sequences_folder, filename)
        with open(filename, "rb") as pickle_file:
            sequences = pickle.load(pickle_file)
            for name, seq in sequences.items() :
                number_of_sequences += 1
                number_of_terms.append(len(seq[1]))
number_of_terms_counter = Counter(number_of_terms)

In [None]:
print(number_of_sequences)

In [None]:
maximal_number_terms = [5, 10, 20, 50, 99]
for max_terms in maximal_number_terms :
    print(f"Percentage of sequences with maximal {max_terms} terms:")
    print(numerical_approx(sum(number_of_terms_counter[i] for i in range(max_terms+1))/number_of_sequences, digits=2))

In [None]:
minimal_number_terms = [0, 50, 100, 1000, 5000, 9000, 9900]
for min_terms in minimal_number_terms :
    num_terms = 10000-min_terms
    print(f"Percentage of sequences with at least {num_terms} terms:")
    sum_sequences = 0
    for key in sorted(number_of_terms_counter.keys(), reverse=True) :
        if key < num_terms :
            break
        sum_sequences += number_of_terms_counter[key]
    print(numerical_approx(sum_sequences/number_of_sequences, digits=2))

In [None]:
bins =  [0] + [i*20+21 for i in range(5)] + [i*200+201 for i in range(0,5)] + [i*2000+2001 for i in range(0,5)] + [max(number_of_terms_counter)]
print(bins)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
hist = np.histogram(number_of_terms, bins=bins)
print(hist)

In [None]:
plt.bar([f"$\\leq${bin-1}" for bin in hist[1][1:-1]]+[f"$>$ {hist[1][-2]-1}"], hist[0], color=plt.cm.tab20(1))
plt.rcParams["figure.figsize"] = (25, 5)
plt.rcParams.update({'font.size': 15})
plt.savefig('numberOfTerms.png')
plt.show()