In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor

class Script:
    def __init__(self,dataset_name):
        self.dataset_name = dataset_name
        
    def read_file(self):
        df = pd.read_csv(self.dataset_name)
        return df
    
    def Independent_and_Dependent_Feature(self,df):
        X = df[['x']]
        y = df['y']
        return X,y
    
    def train_test_split(self,X,y):
        X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=10)
        return X_train,X_test,y_train,y_test
        
    def linear_regression(self,X_train,X_test,y_train,y_test):
        lin_regressor = LinearRegression()
        lin_regressor.fit(X_train,y_train)
        lin_predict = lin_regressor.predict(X_test)
        lin_regressor_score = r2_score(y_test,lin_predict)*100
        return lin_regressor , lin_regressor_score
    
    def KNN(self,X_train,X_test,y_train,y_test):
        knn_regressor = KNeighborsRegressor(n_neighbors=5)
        knn_regressor.fit(X_train,y_train)
        knn_regressor_predict = knn_regressor.predict(X_test)
        knn_regressor_score = r2_score(y_test,knn_regressor_predict)*100
        return knn_regressor , knn_regressor_score
        
    def decision_tree(self,X_train,X_test,y_train,y_test):
        decisiontree_regressor = DecisionTreeRegressor(random_state=0)
        decisiontree_regressor.fit(X_train,y_train)
        decisiontree_regressor_predict = decisiontree_regressor.predict(X_test)
        decisiontree_regressor_score = r2_score(y_test,decisiontree_regressor_predict)*100
        return decisiontree_regressor , decisiontree_regressor_score
        
    def SVR(self,X_train,X_test,y_train,y_test):
        sv_regressor = SVR()
        sv_regressor.fit(X_train,y_train)
        sv_regressor_predict = sv_regressor.predict(X_test)
        sv_regressor_score = r2_score(y_test,sv_regressor_predict)*100
        return sv_regressor , sv_regressor_score
    
    def random_forest(self,X_train,X_test,y_train,y_test):
        randomforest_regressor = RandomForestRegressor()
        randomforest_regressor.fit(X_train,y_train)
        randomforest_regressor_predict = randomforest_regressor.predict(X_test)
        randomforest_regressor_score = r2_score(y_test,randomforest_regressor_predict)*100
        return randomforest_regressor , randomforest_regressor_score
    
    def extra_tree_regressor(self,X_train,X_test,y_train,y_test):
        extratree_regressor = ExtraTreesRegressor()
        extratree_regressor.fit(X_train,y_train)
        extratree_regressor_predict = extratree_regressor.predict(X_test)
        extratree_regressor_score = r2_score(y_test,extratree_regressor_predict)*100
        return extratree_regressor , extratree_regressor_score

    def model_training(self):
        df = Script.read_file(self)
        X,y = Script.Independent_and_Dependent_Feature(self,df)
        X_train,X_test,y_train,y_test = Script.train_test_split(self,X,y)
        lin_regressor,lin_regressor_score = Script.linear_regression(self,X_train,X_test,y_train,y_test)
        knn_regressor , knn_regressor_score = Script.KNN(self,X_train,X_test,y_train,y_test)
        decisiontree_regressor , decisiontree_regressor_score = Script.decision_tree(self,X_train,X_test,y_train,y_test)
        sv_regressor , sv_regressor_score = Script.SVR(self,X_train,X_test,y_train,y_test)
        randomforest_regressor , randomforest_regressor_score = Script.random_forest(self,X_train,X_test,y_train,y_test)
        extratree_regressor , extratree_regressor_score = Script.extra_tree_regressor(self,X_train,X_test,y_train,y_test)
        
        ## creating data frame of all r2 scores
        dict_of_list = {
            'ML Algo':['lin_regressor_score','knn_regressor_score','decisiontree_regressor_score','sv_regressor_score','randomforest_regressor_score','extratree_regressor_score'],
            'Accuracy':[lin_regressor_score,knn_regressor_score,decisiontree_regressor_score,sv_regressor_score,randomforest_regressor_score,extratree_regressor_score]
        }
        df1 = pd.DataFrame(dict_of_list)
        print(df1)
        print("Maximum Accuracy:",max(df1['Accuracy']))
        print("Prediction for value 50 is ",extratree_regressor.predict([[50]]))
cls = Script('scr-dataset.csv')## path
cls.model_training()
    

                        ML Algo   Accuracy
0           lin_regressor_score  -0.829650
1           knn_regressor_score  98.233441
2  decisiontree_regressor_score  96.956604
3            sv_regressor_score  -2.558899
4  randomforest_regressor_score  98.733286
5     extratree_regressor_score  99.881014
Maximum Accuracy: 99.88101375685918
Prediction for value 50 is  [1.7594026]
