<a href="https://colab.research.google.com/github/AndreGulyi/ML_projects/blob/main/AG_Predicting__rice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import xgboost
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import sklearn
from sklearn import tree
from sklearn import svm
from sklearn import neighbors
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor


In [None]:
df = pd.read_csv("/content/drive/MyDrive/ML/AG.Projects/Predicting Price/sample.csv")

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.head(10)

Data Manipulation

In [None]:
df["loc1"].value_counts()

In [None]:
df["loc2"].value_counts()

In [None]:
df = df[(df["loc1"].str.contains("S") == False)&(df["loc1"].str.contains("T") == False)]
df.shape

In [None]:
df["loc1"] = pd.to_numeric(df["loc1"], errors="coerce")
df["loc2"] = pd.to_numeric(df["loc2"], errors="coerce")
df.dropna(inplace=True)
df.shape

Data Type Changing

In [None]:
days_dummies = pd.get_dummies(df.dow)
days_dummies.head()

In [None]:
df2 =  df.copy(deep=True)
df2.drop(columns = 'dow', inplace =True)

In [None]:
result =  df2.join(days_dummies)
result.head()

Checking Outliers and Correlations

In [None]:
from pandas.plotting import scatter_matrix
_ = scatter_matrix(result.iloc[:,0:7], figsize=(12, 8))

In [None]:
pd.DataFrame((result.corr()['price'])).sort_values(by='price', ascending = False).round(2)

In [None]:
three_best = ['para2','para4','para3']  
five_best = ['para2','para4','para3','para1','Fri'] 

# Machine Learning
Regression Models

In [None]:
feature_sets = {
    "full_dataset": result.drop(columns=['price']),
    "three_best": result[three_best],
    "five_best": result[five_best],  
}

In [None]:
regression_models = {
    'Ridge':linear_model.Ridge(random_state=8),
    'DecisionTree':tree.DecisionTreeRegressor(random_state=8, max_depth=5),
    'RandomForest':RandomForestRegressor(random_state=8),
    'XGBoost': XGBRegressor(random_state=8),
    'LGMB': LGBMRegressor(random_state=8),
    'MLP':MLPRegressor(random_state=8),
}

In [None]:
def make_regression(x_train, y_train, x_test, y_test, model, model_name, verbose=True):

    model.fit(x_train,y_train)
    
    y_predict=model.predict(x_train)
    train_error = mean_squared_error(y_train, y_predict, squared=False)
    
    y_predict =model.predict(x_test)
    test_error = mean_squared_error(y_test, y_predict, squared=False)
    
    y_predict=model.predict(x_train)
    r2 = r2_score(y_train, y_predict)
    
    if verbose:
        print("----Model name = {}-----".format(model_name))
        print("Train error = "'{}'.format(train_error.round(1)))
        print("Test error = "'{}'.format(test_error.round(1)))
        print("r2_score = "'{}'.format(r2.round(2)))
        print("--------------------------------")
    
    trained_model = model
    
    return trained_model, y_predict, train_error, test_error, r2

In [None]:
pred_dict = {
    "regression_model": [],
    "feature_set": [],
    "Train Error": [],
    "Test Error": [],
    "R2" : []
}

In [None]:
for feature_set_name in feature_sets.keys():
    
    feature_set = feature_sets[feature_set_name]
    print("Included columns are {}".format(feature_set_name))
    for model_name in regression_models.keys():        
        
        y = result["price"]
        x = feature_set
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=8)
    

        trained_model, y_predict, train_error, test_error, r2 = make_regression(x_train, y_train, x_test, y_test, regression_models[model_name], model_name, verbose=True)


        pred_dict["regression_model"].append(model_name)
        pred_dict["feature_set"].append(feature_set_name)
        pred_dict["Train Error"].append(train_error)
        pred_dict["Test Error"].append(test_error)
        pred_dict["R2"].append(r2)

In [None]:
pred_df = pd.DataFrame(pred_dict)
pred_df.head(5)