In [1]:
#import the libraries
import pandas as pd

In [2]:
#import the required algorithms
from sklearn.model_selection import train_test_split
from sklearn.linear_model import (
LinearRegression,Ridge,Lasso,ElasticNet,SGDRegressor,HuberRegressor
)
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
import lightgbm as lgb
import xgboost as xgb
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score

In [3]:
#import the pickle
import pickle

In [4]:
#Load the data set
data=pd.read_csv(r"C:\Users\Meghana\OneDrive\Desktop\desktop\data science\datasets given by sir\USA_Housing.csv")

In [5]:
# split the data into independent and dependent variable
x=data.drop(['Price','Address'],axis=1)
y=data['Price']

In [6]:
#split the data
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)

In [7]:
#Define the models
models={
    'LinearRegression':LinearRegression(),
    'RobustRegression':HuberRegressor(),
    'RidgeRegression':Ridge(),
    'ElasticNet': ElasticNet(),
    'LassoRegression':Lasso(),
    'PolynomialRegression':Pipeline([
        ('poly',PolynomialFeatures(degree=2)),
        ('linear',LinearRegression())
    ]),
    'SGDRegressor':SGDRegressor(),
    'ANN':MLPRegressor(hidden_layer_sizes=(100,),max_iter=1000),
    'RandomForest':RandomForestRegressor(),
    'SVM':SVR(),
    'LGBM':lgb.LGBMRegressor(),
    'XGBoost':xgb.XGBRFRegressor(),
    'KNN':KNeighborsRegressor()
}

In [9]:
#train and evaluate models
results=[]
for name,model in models.items():
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    mae=mean_absolute_error(y_test,y_pred)
    mse=mean_squared_error(y_test,y_pred)
    r2=r2_score(y_test,y_pred)
    results.append(
        {
            'Model':name,
            'MAE':mae,
            'MSE':mse,
            'R2':r2
        }
    )
    with open(f'{name}.pkl','wb') as f:
        pickle.dump(model,f)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000338 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1256
[LightGBM] [Info] Number of data points in the train set: 4000, number of used features: 5
[LightGBM] [Info] Start training from score 1231911.452183


In [11]:
#convert the results to dataframe and save to csv
results_df=pd.DataFrame(results)
results_df.to_csv('model evaluation results.csv',index=False)

## Models have been trains\ed and saved as pickle files.Evaluation results have been saved as csv file