In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [10]:
def trafficRegression(filename, D):
    # Read the CSV file
    df = pd.read_csv(filename, sep=';')
    
    # Get the hour columns (they are in format HH_HH)
    hour_cols = [col for col in df.columns if '_' in col]
    
    # Create a list to store (hour, traffic) pairs
    data = []
    
    # Process each row and extract hour-traffic pairs
    for _, row in df.iterrows():
        for hour_col in hour_cols:
            # Extract hour from column name (e.g., '00_01' -> 0)
            hour = int(hour_col.split('_')[0])
            traffic = row[hour_col]
            
            # Only add if traffic value is not empty/NaN
            if pd.notna(traffic):
                data.append([hour, traffic])
    
    # Convert to numpy array
    data = np.array(data)
    
    # Create polynomial features
    poly = PolynomialFeatures(degree=D)
    X_poly = poly.fit_transform(data[:, 0].reshape(-1, 1))
    
    # Fit the model
    model = LinearRegression()
    model.fit(X_poly, data[:, 1])
    
    # Get coefficients and intercept
    coef = model.coef_[1:]  # Skip the first coefficient (it's for x^0)
    intercept = model.intercept_
    
    # Print coefficients in reverse order (highest degree to lowest) and intercept
    result = ' '.join([f"{x:.3f}" for x in np.concatenate([coef[::-1], [intercept]])])
    print(result) 

In [11]:
trafficRegression("liikenne1.csv", 4)

0.109 -6.815 111.812 -306.226 340.916
