# Initial Setup

In [1]:
"""
Armando Gutiérrez Rojo A01702748
Oscar Eduardo Nieto Espitia A01705090
Miguel Weiping Tang A01610836

"""
import math

SEED = 6
MULTIPLIER = 32
INCREMENT = 3
MODULUS = 80
COUNT = 10

In [2]:
def read_data_from_file(filename):
    data = []
    with open(filename, 'r') as file:
        for line in file:
            data.append(float(line.strip()))
    return data

# Problem 1 LCG

In [3]:
"""
  Problem 1 LCG
"""
def lcg(seed, multiplier, increment, modulus, count):
    """
    Generates a sequence of pseudo-random numbers using the Linear Congruential Generator (LCG) method.

    Args:
    - seed: Initial value (seed) for number generation.
    - multiplier: Multiplier for calculating the next number.
    - increment: Constant value added in each iteration.
    - modulus: Modulus that determines the range of the generated numbers.
    - count: Number of values to generate.

    Returns:
    - numbers: List of generated pseudo-random numbers.
    """
    numbers = []
    x = seed
    for _ in range(count):
        x = (multiplier * x + increment) % modulus
        numbers.append(x / modulus)
    return numbers

# Problem 2 Chi Squared

In [4]:
"""
  Problem 2 Chi Squared Test
"""
def create_intervals(step=0.1, precision=4):
    """
    Create a list of intervals based on a given step size and precision.

    Args:
    - step: The size of each interval.
    - precision: The number of decimal places to round to.

    Returns:
    - intervals: A list of intervals represented as [start, end] pairs.
    """
    intervals = []
    interval_start = 0
    for _ in range(10):
        interval_end = round(interval_start + step, precision)
        intervals.append([interval_start, interval_end])
        interval_start = interval_end
    return intervals

In [5]:
def calculate_frequencies(data, intervals):
    """
    Calculate the frequency of numbers in the data within each interval.

    Args:
    - data: A list of numbers.
    - intervals: A list of intervals represented as [start, end] pairs.

    Returns:
    - frequencies: A list of frequencies corresponding to each interval.
    """
    frequencies = [0] * len(intervals)
    for num in data:
        for i, (start, end) in enumerate(intervals):
            if start <= num < end:
                frequencies[i] += 1
    return frequencies

In [6]:
def calculate_chi_square(frequencies):
    """
    Calculate the chi-squared value for the given frequencies.

    Args:
    - frequencies: A list of observed frequencies.

    Returns:
    - chi_square: The chi-squared value.
    """
    expected = sum(frequencies) / len(frequencies)
    chi_square = sum((observed - expected) ** 2 / expected for observed in frequencies)
    return chi_square

In [7]:
def print_results(intervals, frequencies, chi_square, chi_critical=16.91):
    """
    Print the results of the chi-squared test.

    Args:
    - intervals: A list of intervals.
    - frequencies: A list of observed frequencies.
    - chi_square: The calculated chi-squared value.
    - chi_critical: The critical chi-squared value for the hypothesis test.

    Returns:
    - None
    """
    print("Chi-squared test")
    print("Intervals\tObserved\tExpected\t(O - E)^2 / E")
    expected = sum(frequencies) / len(frequencies)
    for (start, end), observed in zip(intervals, frequencies):
        o_minus_e_squared = (observed - expected) ** 2 / expected
        print(f"[{start:.4f} - {end:.4f})\t{observed}\t\t{expected:.4f}\t\t{o_minus_e_squared:.4f}")
    print("--------------------Suma:", sum(frequencies), f"---------------------\tχ^2 = {round(chi_square, 4)}")

    # Perform the hypothesis test
    hypothesis_result = "rejected" if chi_square > chi_critical else "not rejected"
    print(f"\nH0: Generated numbers are not different from the uniform distribution")
    print(f"H1: Generated numbers are different from the uniform distribution\n")
    print(f"Since {round(chi_square, 4)} > {chi_critical}, H0 is {hypothesis_result}")

In [8]:
def chi_square_test(data):
    """
    Perform the chi-squared test on the given data.

    Args:
    - data: A list of numbers.

    Returns:
    - None
    """
    intervals = create_intervals()
    frequencies = calculate_frequencies(data, intervals)
    chi_square = calculate_chi_square(frequencies)
    print_results(intervals, frequencies, chi_square)

# Problem 3 Run Test

In [9]:
"""
  Problem 3 Run Test
"""
def calculate_statistics(signs):
    """
    Calculate the statistics for the streaks.

    Args:
    - signs: A list of signs.

    Returns:
    - A tuple containing the mean, standard deviation, and z-score.
    """
    miu = (2 * len(signs) - 1) / 3
    sigma = math.sqrt((16 * len(signs) - 29) / 90)
    streaks = 1
    for i in range(len(signs) - 1):
        if signs[i] != signs[i + 1]:
            streaks += 1
    z_score = (streaks - miu) / sigma
    return miu, sigma, z_score, streaks

In [10]:
def print_results2(signs, miu, sigma, z_score, streaks):
    """
    Print the results of the streaks calculation.

    Args:
    - signs: A list of signs.
    - miu: The mean.
    - sigma: The standard deviation.
    - z_score: The z-score.
    - streaks: The number of streaks.

    Returns:
    - None
    """
    # Print generated signs
    print("Generated signs:")
    print(" ".join(signs))
    print("\ntotal signs:", len(signs))
    print("total runs:", streaks)

    # Print statistics
    print("\nStatistics")
    print(f"Miu = {miu:.4f}")
    print(f"Sigma = {sigma:.5f}")
    print(f"Zscore = {z_score:.6f}\n")

    # Perform and print the hypothesis test
    hypothesis_result = "not rejected" if abs(z_score) < 1.96 else "rejected"
    print("H0: Appearance of the numbers is random")
    print("H1: Appearance of the numbers is not random")
    print(f"Since |{z_score:.6f}| < |1.96|, H0 is {hypothesis_result}")

In [11]:
def get_streaks(data):
    """
    Calculate the streaks in the given data.

    Args:
    - data: A list of numbers.

    Returns:
    - None
    """
    signs = ['+' if data[i] <= data[i + 1] else '-' for i in range(len(data) - 1)]
    miu, sigma, z_score, streaks = calculate_statistics(signs)
    print_results2(signs, miu, sigma, z_score, streaks)

# Problem Outputs and conclusions

In [12]:
"""
Problem Outputs and Conclusions
"""

random_numbers = lcg(SEED, MULTIPLIER, INCREMENT, MODULUS, COUNT)
print(f"Linear congrential method: \n The numbers are: {random_numbers} \n")
chi_square_test(read_data_from_file("chi_data.txt"))
get_streaks(read_data_from_file("runs_data.txt"))

Linear congrential method: 
 The numbers are: [0.4375, 0.0375, 0.2375, 0.6375, 0.4375, 0.0375, 0.2375, 0.6375, 0.4375, 0.0375] 

Chi-squared test
Intervals	Observed	Expected	(O - E)^2 / E
[0.0000 - 0.1000)	0		3.0000		3.0000
[0.1000 - 0.2000)	3		3.0000		0.0000
[0.2000 - 0.3000)	3		3.0000		0.0000
[0.3000 - 0.4000)	4		3.0000		0.3333
[0.4000 - 0.5000)	5		3.0000		1.3333
[0.5000 - 0.6000)	6		3.0000		3.0000
[0.6000 - 0.7000)	0		3.0000		3.0000
[0.7000 - 0.8000)	2		3.0000		0.3333
[0.8000 - 0.9000)	7		3.0000		5.3333
[0.9000 - 1.0000)	0		3.0000		3.0000
--------------------Suma: 30 ---------------------	χ^2 = 19.3333

H0: Generated numbers are not different from the uniform distribution
H1: Generated numbers are different from the uniform distribution

Since 19.3333 > 16.91, H0 is rejected
Generated signs:
+ - - + - + - + + + + + -

total signs: 13
total runs: 8

Statistics
Miu = 8.3333
Sigma = 1.41028
Zscore = -0.236360

H0: Appearance of the numbers is random
H1: Appearance of the numbers is not