In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, r2_score


In [16]:
data = pd.read_csv('/content/MPG.csv')


In [17]:
X = data.drop(columns=['mpg', 'name'])
y = data['mpg']

In [18]:
X.isnull().sum()

Unnamed: 0,0
cylinders,0
displacement,0
horsepower,6
weight,0
acceleration,0
model_year,0
origin,0


In [20]:
from sklearn.impute import SimpleImputer

# Assuming X is your DataFrame

# Separate numeric columns
numeric_cols = X.select_dtypes(include=['float64', 'int64']).columns

# Create the imputer for filling missing values with mean
imputer = SimpleImputer(strategy='mean')

# Apply imputer only to numeric columns
X[numeric_cols] = imputer.fit_transform(X[numeric_cols])

In [21]:
X = pd.get_dummies(X, columns=['origin'], drop_first=True)

In [22]:
from sklearn.preprocessing import StandardScaler

In [23]:
ss = StandardScaler()
X = ss.fit_transform(X)

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [25]:
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
    'Support Vector Regression (SVR)': SVR(),
    'K-Nearest Neighbors': KNeighborsRegressor(n_neighbors=5)
}

In [26]:
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)  # Train the model
    y_pred = model.predict(X_test)  # Predict on test set

    # Evaluate the model
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return mae, r2


In [27]:
for name, model in models.items():
    print(f"Training {name}...")
    mae, r2 = evaluate_model(model, X_train, X_test, y_train, y_test)
    print(f"{name} - MAE: {mae:.4f}, R^2: {r2:.4f}")


Training Linear Regression...
Linear Regression - MAE: 2.2876, R^2: 0.8449
Training Random Forest...
Random Forest - MAE: 1.5989, R^2: 0.9128
Training Gradient Boosting...
Gradient Boosting - MAE: 1.7352, R^2: 0.9022
Training Support Vector Regression (SVR)...
Support Vector Regression (SVR) - MAE: 1.8140, R^2: 0.8743
Training K-Nearest Neighbors...
K-Nearest Neighbors - MAE: 1.8078, R^2: 0.9069
