In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load your dataset
df = pd.read_csv('data/train.csv')

# Display the first few rows to check
df.head()


In [None]:
# Handling missing values
df = df.dropna()

# Split the dataset into features (X) and target (y)
X = df.drop('target_column', axis=1)  # Replace 'target_column' with the actual column name
y = df['target_column']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check shapes of resulting data
print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)


In [None]:
# Initialize models
linear_regression = LinearRegression()
random_forest = RandomForestRegressor(n_estimators=100, random_state=42)
xgboost = xgb.XGBRegressor(n_estimators=100, random_state=42)
lightgbm = lgb.LGBMRegressor(n_estimators=100, random_state=42)

# Fit the models on the training data
linear_regression.fit(X_train, y_train)
random_forest.fit(X_train, y_train)
xgboost.fit(X_train, y_train)
lightgbm.fit(X_train, y_train)

# Predict the target on the test set
y_pred_lr = linear_regression.predict(X_test)
y_pred_rf = random_forest.predict(X_test)
y_pred_xgb = xgboost.predict(X_test)
y_pred_lgb = lightgbm.predict(X_test)

# Calculate the performance of each model
mae_lr = mean_absolute_error(y_test, y_pred_lr)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
mae_lgb = mean_absolute_error(y_test, y_pred_lgb)

r2_lr = r2_score(y_test, y_pred_lr)
r2_rf = r2_score(y_test, y_pred_rf)
r2_xgb = r2_score(y_test, y_pred_xgb)
r2_lgb = r2_score(y_test, y_pred_lgb)

# Print the results
print(f"Linear Regression MAE: {mae_lr}, R^2: {r2_lr}")
print(f"Random Forest MAE: {mae_rf}, R^2: {r2_rf}")
print(f"XGBoost MAE: {mae_xgb}, R^2: {r2_xgb}")
print(f"LightGBM MAE: {mae_lgb}, R^2: {r2_lgb}")
