In [53]:
import csv
from collections import Counter
import math

# Open the file and read data
with open('price.csv', 'r') as f:
    data = f.read()

# Split the data into rows and clean up empty rows
data = [row.split(',') for row in data.strip().split('\n')]

# Extract headers and data separately
headers = data[0]  # First row as headers
data = data[1:]    # Remaining rows as data

def categorize_variables(headers, data):
    num_vars, cat_vars = [], []
    for i, h in enumerate(headers):
        try:
            # Attempt to convert all values in column to float
            [float(row[i]) for row in data]
            num_vars.append(h)
        except ValueError:
            # If any value cannot be converted, it's categorical
            cat_vars.append(h)
    return num_vars, cat_vars

def contingency_table(data, cat_vars):
    tables = {}
    for var in cat_vars:
        idx = headers.index(var)  # Get index of the variable
        counts = {}
        for row in data:
            key = row[idx]
            counts[key] = counts.get(key, 0) + 1
        tables[var] = counts
    return tables

def stats_numeric(data, num_vars):
    stats = {}
    for var in num_vars:
        idx = headers.index(var)  # Get index of the variable
        vals = [float(row[idx]) for row in data]  # Extract values
        n = len(vals)
        mean = sum(vals) / n
        variance = sum((x - mean) ** 2 for x in vals) / n
        std_dev = math.sqrt(variance)
        mode = Counter(vals).most_common(1)[0][0]  # Most common value
        
        stats[var] = {
            'mean': mean,
            'min': min(vals),
            'max': max(vals),
            'median': vals[n // 2] if n % 2 else (vals[n // 2 - 1] + vals[n // 2]) / 2,
            'mode': mode,
            'variance': variance,
            'std_dev': std_dev
        }
    return stats

# Categorize variables
num_vars, cat_vars = categorize_variables(headers, data)

# Print results
print("Numerical Variables:", num_vars)
print("Categorical Variables:", cat_vars)
print("Contingency Tables:", contingency_table(data, cat_vars))
print("Numerical Stats:", stats_numeric(data, num_vars))


Numerical Variables: ['Open', 'High', 'Low']
Categorical Variables: ['Date', 'Close', 'Volume']
Contingency Tables: {'Date': {'1/3/2012': 1, '1/4/2012': 1, '1/5/2012': 1, '1/6/2012': 1, '1/9/2012': 1, '1/10/2012': 1, '1/11/2012': 1, '1/12/2012': 1, '1/13/2012': 1, '1/17/2012': 1, '1/18/2012': 1, '1/19/2012': 1, '1/20/2012': 1, '1/23/2012': 1, '1/24/2012': 1, '1/25/2012': 1, '1/26/2012': 1, '1/27/2012': 1, '1/30/2012': 1, '1/31/2012': 1, '2/1/2012': 1, '2/2/2012': 1, '2/3/2012': 1, '2/6/2012': 1, '2/7/2012': 1, '2/8/2012': 1, '2/9/2012': 1, '2/10/2012': 1, '2/13/2012': 1, '2/14/2012': 1, '2/15/2012': 1, '2/16/2012': 1, '2/17/2012': 1, '2/21/2012': 1, '2/22/2012': 1, '2/23/2012': 1, '2/24/2012': 1, '2/27/2012': 1, '2/28/2012': 1, '2/29/2012': 1, '3/1/2012': 1, '3/2/2012': 1, '3/5/2012': 1, '3/6/2012': 1, '3/7/2012': 1, '3/8/2012': 1, '3/9/2012': 1, '3/12/2012': 1, '3/13/2012': 1, '3/14/2012': 1, '3/15/2012': 1, '3/16/2012': 1, '3/19/2012': 1, '3/20/2012': 1, '3/21/2012': 1, '3/22/2012': 

In [54]:
import csv
from collections import Counter
import math

# Open the file and read data
with open('price.csv', 'r') as f:
    data = f.read()

# Split the data into rows and clean up empty rows
data = [row.split(',') for row in data.strip().split('\n')]

# Extract headers and data separately
headers = data[0]  # First row as headers
data = data[1:]    # Remaining rows as data


def stats_numeric(data, num_vars):
    stats = {}
    for var in num_vars:
        idx = headers.index(var)  # Get index of the variable
        vals = [float(row[idx]) for row in data]  # Extract values
        n = len(vals)
        mean = sum(vals) / n
        variance = sum((x - mean) ** 2 for x in vals) / n
        std_dev = math.sqrt(variance)
        mode = Counter(vals).most_common(1)[0][0]  # Most common value
        
        stats[var] = {
            'mean': mean,
            'min': min(vals),
            'max': max(vals),
            'median': vals[n // 2] if n % 2 else (vals[n // 2 - 1] + vals[n // 2]) / 2,
            'mode': mode,
            'variance': variance,
            'std_dev': std_dev
        }
    return stats



print("Numerical Variables:", num_vars)
print("Numerical Stats:", stats_numeric(data, num_vars))


Numerical Variables: ['Open', 'High', 'Low']
Numerical Stats: {'Open': {'mean': 533.7098330683621, 'min': 279.12, 'max': 816.68, 'median': 581.9549999999999, 'mode': 306.35, 'variance': 23056.61689862078, 'std_dev': 151.8440545382689}, 'High': {'mean': 537.8802225755155, 'min': 281.21, 'max': 816.68, 'median': 584.115, 'mode': 401.99, 'variance': 23393.085951460773, 'std_dev': 152.9479844635449}, 'Low': {'mean': 529.0074085850564, 'min': 277.22, 'max': 805.14, 'median': 578.665, 'mode': 309.46, 'variance': 22648.130191694778, 'std_dev': 150.49295728270735}}
