In [4]:
import pandas as pd
import numpy as np

In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_boston
from warnings import filterwarnings
filterwarnings('ignore')

In [8]:
class data_preprocessing:
    def __init__(self,data):
        self.data = data
        from warnings import filterwarnings
        filterwarnings("ignore")
        self.objects=data_preprocessing.initialize()
        self.input = None
        self.output = None
        self.features = list(data.columns)
        self.output_name = None
        self.train_features,self.train_target,self.test_target,self.test_features = None,None,None,None
    def drop_null(self):
        self.data.dropna(axis=0,inplace=True)
    def initialize():
        from sklearn import preprocessing,model_selection,decomposition
        return {
                'Standard scaler':preprocessing.StandardScaler,
                'Min Max Scalar':preprocessing.MinMaxScaler,
                'PCA':decomposition.PCA,
                'train test split':model_selection.train_test_split,
               }
    def out_in(self,output_name):
        self.input = self.data.drop(output_name,axis=1)
        self.output = self.data[output_name]
        self.features.remove(output_name)
        self.output_name = output_name
    def apply_count_vectorize(self,col,count_vect_obj=None):
        if count_vect_obj ==None:
            from sklearn.feature_extraction.text import CountVectorizer
            self.objects['Countvec_'+col] = CountVectorizer()
            self.data[col] = self.objects['Countvec_'+col].fit_transform(data[col])
        else:
            self.objects['Countvec_'+col] = count_vect_obj
            self.data[col] = self.objects['Countvec_'+col].fit_transform(data[col])
    def split(self,test_percent,rs = 42):
         self.train_features,self.test_features,self.train_target,self.test_target = self.objects['train test split'](self.input,self.output,test_size=test_percent,random_state=rs)
    def get_object_column(self):
        import numpy as np
        edit_col = [i for i in self.features if self.data[i].dtype == np.object]
        return edit_col
    def encode_categorical_columns(self):
        import numpy as np
        from sklearn.preprocessing import LabelEncoder
        label_encoder_objects ={}
        edit_columns = self.get_object_column()
        for col in edit_columns:
            label_object = LabelEncoder()
            self.data[col]=label_object.fit_transform(self.data[col])
            label_encoder_objects[col+"_encoder_object"] = label_object
        self.objects['Label_Encoder'] = label_encoder_objects
    def change_columns(self,columns):
        self.data = self.data[columns]
    def apply_smote_data(self):
        from imblearn.over_sampling import SMOTE
        smote_object = SMOTE()
        self.train_features,self.train_target = smote_object.fit_resample(self.train_features,self.train_target)
        self.objects['Smote object'] = smote_object
    def standardize_or_normalize(self,scale_type=None):
        if scale_type == "Standard":
            from pandas import DataFrame as df
            scale_object  = self.objects['Standard scaler']()
            self.train_features=df(data = scale_object.fit_transform(self.train_features),columns = self.features)
            self.test_features = df(data = scale_object.fit_transform(self.test_features),columns = self.features)
        elif scale_type == "Normalize":
            from pandas import DataFrame as df
            scale_object  = self.objects['Min Max Scalar']()
            self.train_features=df(data = scale_object.fit_transform(self.train_features),columns = self.features)
            self.test_features = df(data = scale_object.fit_transform(self.test_features),columns = self.features)

In [73]:
class machine_learning_classification:
    def __init__(self,data_pr,models=None):
        self.data = data_pr.data
        self.train_features = data_pr.train_features
        self.train_target = data_pr.train_target
        self.test_features = data_pr.test_features
        self.test_target = data_pr.test_target
        if models==None:
            from sklearn.linear_model import LogisticRegression
            from sklearn.tree import DecisionTreeClassifier
            from sklearn.ensemble import RandomForestClassifier
            from sklearn.naive_bayes import BernoulliNB,GaussianNB
            from sklearn.neighbors import KNeighborsClassifier
            models = [LogisticRegression(),DecisionTreeClassifier(),RandomForestClassifier(),BernoulliNB(),GaussianNB(),KNeighborsClassifier()]
        self.model_evaluvation_dict = {str(i).replace("()",""):{'model_object':i} for i in models}
        self.model_prediction = {str(i).replace("()",""):None for i in models}
    def fit(self):
        for model,dic in self.model_evaluvation_dict.items():
            self.model_evaluvation_dict[model]['model_object'].fit(self.train_features,self.train_target)
            self.model_prediction[model] = self.model_evaluvation_dict[model]['model_object'].predict(self.test_features)
    def Score_test_data(self):
        for model,dic in self.model_evaluvation_dict.items():
            self.model_evaluvation_dict[model]['score on test data'] = self.model_evaluvation_dict[model]['model_object'].score(self.test_features,self.test_target)*100
    def create_confusion_matrix(self):
        from sklearn.metrics import confusion_matrix
        for model,dic in self.model_evaluvation_dict.items():
            self.model_evaluvation_dict[model]['confusion matrix for test data'] = confusion_matrix(self.test_target,self.model_prediction[model]).tolist()
    def create_f1_precision_recall(self):
        from sklearn.metrics import f1_score,recall_score,precision_score
        for model,dic in self.model_evaluvation_dict.items():
            self.model_evaluvation_dict[model]['f1 score for test data'] = f1_score(self.test_target,self.model_prediction[model],average='macro')*100
            self.model_evaluvation_dict[model]['precision for test data'] = precision_score(self.test_target,self.model_prediction[model],average='macro')*100
            self.model_evaluvation_dict[model]['recall for test data'] = recall_score(self.test_target,self.model_prediction[model],average='macro')*100
    def evaluvate(self):
        self.fit()
        self.Score_test_data()
        self.create_confusion_matrix()
        self.create_f1_precision_recall()
        return self.model_evaluvation_dict

In [96]:
class machine_learning_regression:
    def __init__(self,data_pr,models=None):
        self.data = data_pr.data
        self.train_features = data_pr.train_features
        self.train_target = data_pr.train_target
        self.test_features = data_pr.test_features
        self.test_target = data_pr.test_target
        if models == None:
            from sklearn.linear_model import LinearRegression,Ridge,Lasso
            from sklearn.tree import DecisionTreeRegressor
            from sklearn.ensemble import RandomForestRegressor
            from sklearn.neighbors import KNeighborsRegressor
            models = [LinearRegression(),Ridge(),Lasso(),DecisionTreeRegressor(),RandomForestRegressor(),KNeighborsRegressor()]
        self.model_evaluvation_dict = {str(i).replace("()",""):{'model_object':i} for i in models}
        self.model_prediction = {str(i).replace("()",""):None for i in models}
    def fit(self):
        for model,dic in self.model_evaluvation_dict.items():
            self.model_evaluvation_dict[model]['model_object'].fit(self.train_features,self.train_target)
            self.model_prediction[model] = self.model_evaluvation_dict[model]['model_object'].predict(self.test_features)
    def Score_test_dataset(self):
        from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error,mean_absolute_percentage_error
        metrics = {'r2 score':r2_score,'MAE':mean_absolute_error,'MSE':mean_squared_error,'MAPE':mean_absolute_percentage_error}
        for model,dic in self.model_evaluvation_dict.items():
            for metric,obj in metrics.items():
                self.model_evaluvation_dict[model][metric] = obj(self.model_prediction[model],self.test_target)
    def evaluvate(self):
        self.fit()
        self.Score_test_dataset()
        return self.model_evaluvation_dict

In [107]:
from sklearn.datasets import load_breast_cancer
data = pd.DataFrame(data= load_breast_cancer().data,columns=load_breast_cancer().feature_names)
data['target'] = load_breast_cancer().target

In [76]:
data

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,0
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,0
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,0
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,0
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,0
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,0
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,0


In [108]:
data_pre_object = data_preprocessing(data)

In [109]:
data_pre_object.drop_null()

In [110]:
data_pre_object.encode_categorical_columns()

In [111]:
data_pre_object.out_in('target')

In [112]:
data_pre_object.split(0.3)

In [113]:
data_pre_object.apply_smote_data()

In [114]:
data_pre_object.standardize_or_normalize("Standard")

In [84]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

In [115]:
model_obj = machine_learning_classification(data_pre_object)

In [116]:
model_obj.evaluvate()

{'LogisticRegression': {'model_object': LogisticRegression(),
  'score on test data': 94.73684210526315,
  'confusion matrix for test data': [[62, 1], [8, 100]],
  'f1 score for test data': 94.46343130553656,
  'precision for test data': 93.79066478076379,
  'recall for test data': 95.5026455026455},
 'DecisionTreeClassifier': {'model_object': DecisionTreeClassifier(),
  'score on test data': 86.54970760233918,
  'confusion matrix for test data': [[62, 1], [22, 86]],
  'f1 score for test data': 86.27943485086342,
  'precision for test data': 86.33004926108374,
  'recall for test data': 89.02116402116403},
 'RandomForestClassifier': {'model_object': RandomForestClassifier(),
  'score on test data': 94.15204678362574,
  'confusion matrix for test data': [[63, 0], [10, 98]],
  'f1 score for test data': 93.8963449457453,
  'precision for test data': 93.15068493150685,
  'recall for test data': 95.37037037037037},
 'BernoulliNB': {'model_object': BernoulliNB(),
  'score on test data': 94.73

In [97]:
from sklearn.datasets import load_boston

In [98]:
data = pd.DataFrame(data = load_boston().data,columns = load_boston().feature_names)
data['price'] = load_boston().target
data.head(5)
from warnings import filterwarnings
filterwarnings('ignore')

In [99]:
data_pre_object = data_preprocessing(data)

In [100]:
data_pre_object.drop_null()

In [101]:
data_pre_object.out_in('price')

In [102]:
data_pre_object.split(0.2)

In [103]:
data_pre_object.standardize_or_normalize('Normalize')

In [104]:
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.ensemble import RandomForestRegressor

In [105]:
models = machine_learning_regression(data_pre_object)

In [106]:
models.evaluvate()

{'LinearRegression': {'model_object': LinearRegression(),
  'r2 score': 0.6118020073614188,
  'MAE': 4.258291450657327,
  'MSE': 31.55304926006149,
  'MAPE': 0.44593798219690534},
 'Ridge': {'model_object': Ridge(),
  'r2 score': 0.5862312682247668,
  'MAE': 4.13586085968485,
  'MSE': 29.230829688715502,
  'MAPE': 0.7564603791846841},
 'Lasso': {'model_object': Lasso(),
  'r2 score': -13.462740665215257,
  'MAE': 5.120079772025155,
  'MSE': 53.7785447350872,
  'MAPE': 0.22893222219685208},
 'DecisionTreeRegressor': {'model_object': DecisionTreeRegressor(),
  'r2 score': 0.7240057442204971,
  'MAE': 3.8372549019607844,
  'MSE': 24.255294117647058,
  'MAPE': 0.17343816097097448},
 'RandomForestRegressor': {'model_object': RandomForestRegressor(),
  'r2 score': 0.6322439957591062,
  'MAE': 4.423843137254901,
  'MSE': 30.71795594117648,
  'MAPE': 0.18108966001368573},
 'KNeighborsRegressor': {'model_object': KNeighborsRegressor(),
  'r2 score': 0.5523848171234557,
  'MAE': 3.09078431372549

LinearRegression Ridge Lasso RandomForestRegressor


In [45]:
a = [len]

In [46]:
a[0]([5,6])

2