In [None]:
# import
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
import sklearn.metrics as sm
from sklearn.metrics import r2_score

In [None]:
finalists_clean_df = pd.read_csv('Data/finalists_cleaned.csv')

In [None]:
# Display the first few rows to verify
print(finalists_clean_df.head())

In [None]:
finalists_clean_df.shape

In [None]:
finalists_clean_df.info()

In [None]:
y = finalists_clean_df['final_place']

# Features: drop target and categorical variables
X = finalists_clean_df.drop(columns=['final_place', 'country', 'style'])

# Define numeric feature names (all remaining)
numeric_features = X.columns.tolist()


In [None]:
y.head()

In [None]:
# Split the dataset (74/26)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.26, random_state=42)


In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
model = Pipeline(steps=[
    ('imputer', SimpleImputer(fill_value=None)),
    ('regressor', LinearRegression())
])

In [None]:
# Fit and predict
model.fit(X_train, y_train)


In [None]:
reg = model.named_steps['regressor']


In [None]:
print("Intercept:", reg.intercept_)
print("Coefficients:", reg.coef_)

feature_names = X_train.columns
coef_table = list(zip(feature_names, reg.coef_))
for name, coef in coef_table:
    print(f"{name:30} {coef:>10.4f}")


In [None]:
y_pred = model.predict(X_test)

In [None]:
print("R² Score:", r2_score(y_test, y_pred))

In [None]:
y_pred

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, color='blue', alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')  # Diagonal
plt.xlabel('Actual Final Place')
plt.ylabel('Predicted Final Place')
plt.title('Predicted vs Actual Final Place')
plt.grid(True)
plt.show()

In [None]:
y_test.iloc[0]

In [None]:
y_pred[0]