In [1]:
# ============================================
# Milestone 3: Predictive Modeling (Regression)
# ============================================

# 1. Import required libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 2. Load Milestone 2 feature-engineered dataset
df = pd.read_csv(r"C:\Users\MEGHAMALA\Downloads\EasyVisa_Milestone2_Final.csv")

print("Dataset loaded successfully")
print("Shape:", df.shape)

# 3. Separate features (X) and target (y)
X = df.drop(columns=["processing_time_days"])
y = df["processing_time_days"]

# 4. Split data into training and testing sets (80%-20%)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Train-test split completed")

# 5. Train Linear Regression model
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

# 6. Train Random Forest Regressor
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# 7. Train Gradient Boosting Regressor
gb = GradientBoostingRegressor(random_state=42)
gb.fit(X_train, y_train)
y_pred_gb = gb.predict(X_test)

# 8. Define evaluation function
def evaluate_model(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n{model_name} Performance:")
    print("MAE :", mae)
    print("RMSE:", rmse)
    print("R2  :", r2)

# 9. Evaluate all models
evaluate_model(y_test, y_pred_lr, "Linear Regression")
evaluate_model(y_test, y_pred_rf, "Random Forest Regressor")
evaluate_model(y_test, y_pred_gb, "Gradient Boosting Regressor")

print("\nMilestone 3 completed successfully!")
print("Models trained and evaluated: Linear Regression, Random Forest, Gradient Boosting")



Dataset loaded successfully
Shape: (25480, 25)
Train-test split completed

Linear Regression Performance:
MAE : 4.777601889645512
RMSE: 6.132588775049916
R2  : 0.79781754571608

Random Forest Regressor Performance:
MAE : 4.470514128728414
RMSE: 5.826396680037793
R2  : 0.8175029386757373

Gradient Boosting Regressor Performance:
MAE : 4.427718394902147
RMSE: 5.706954226025471
R2  : 0.8249087049232288

Milestone 3 completed successfully!
Models trained and evaluated: Linear Regression, Random Forest, Gradient Boosting
