In [248]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split 
import matplotlib.pyplot as plt 
from sklearn.metrics import mean_squared_error
from sklearn.feature_selection import VarianceThreshold
from collections import Counter
from sklearn.linear_model import Ridge
  
# Ridge Regression 
  
class RidgeRegression() : 
      
    def __init__( self, learning_rate, iterations, l2_penality ) : 
          
        self.learning_rate = learning_rate         
        self.iterations = iterations         
        self.l2_penality = l2_penality 
          
    # Function for model training             
    def fit( self, X, Y ) : 
          
        # no_of_training_examples, no_of_features         
        self.m, self.n = X.shape 
          
        # weight initialization         
        self.W = np.zeros( (self.n,1) ) 
          
        self.b = 0        
        self.X = X         
        self.Y = Y 
          
        # gradient descent learning 
                  
        for i in range( self.iterations ) :             
            self.update_weights()             
        return self
      
    # Helper function to update weights in gradient descent 
      
    def update_weights( self ) :            
        Y_pred = self.predict( self.X ) 
          
        # calculate gradients       
        dW = ( - ( 2 * ( self.X.T ).dot( self.Y - Y_pred ) ) +               
               ( 2 * self.l2_penality * self.W ) ) / self.m      
        db = - 2 * np.sum( self.Y - Y_pred ) / self.m  
          
        # update weights     
        self.W = self.W - self.learning_rate * dW     
        self.b = self.b - self.learning_rate * db         
        return self
      
    # Hypothetical function  h( x )  
    def predict( self, X ) :     
        return X.dot( self.W ) + self.b 

def feature_selection(train_instances):
    print('Crossvalidation started... ')
    selector = VarianceThreshold()
    selector.fit(train_instances)
    print('Number of features used... ' +
              str(Counter(selector.get_support())[True]))
    print('Number of features ignored... ' +
              str(Counter(selector.get_support())[False]))
    print(Counter(selector.get_support()))
    return selector
    
def main() : 
      
    # Importing dataset     
    df = pd.read_csv( "./Dataset/Training/Features_Variant_1.csv" ) 
    X_train = df.iloc[:, :-1] 
    Y_train = df.iloc[:, -1:]     
  
    fs = feature_selection(X_train)
    X_train = fs.transform(X_train)

    test = pd.read_csv('./Dataset/Testing/TestSet/Test_Case_1.csv')
    X_test = test.iloc[:,:-1]
    Y_test = test.iloc[:,-1:]

    
    # Model training     
    model = RidgeRegression( iterations = 1000,                              
                            learning_rate = 0.01, l2_penality = 1 ) 
    model.fit( X_train, Y_train ) 

      
    # Prediction on test set 
    Y_pred = model.predict( X_test )     
    print( "Predicted values ", np.round( Y_pred, 2 ) )      
    print( "Real values      ", Y_test )
    
    mse = mean_squared_error(Y_test, Y_pred)
    print("MSE: ",mse)

In [249]:
main()

Predicted values  [[ 8.7400e+00]
 [-5.5000e-01]
 [-8.0000e-01]
 [-1.3600e+00]
 [ 2.0020e+02]
 [ 8.5100e+00]
 [ 3.2700e+00]
 [-1.1370e+01]
 [-3.7500e+00]
 [-7.6400e+00]
 [ 2.9660e+01]
 [ 1.1660e+01]
 [ 1.8540e+01]
 [ 3.7000e+00]
 [ 1.4980e+01]
 [ 3.9570e+02]
 [ 6.0500e+00]
 [ 1.5360e+01]
 [ 3.4000e-01]
 [-8.5400e+00]
 [ 1.3120e+01]
 [ 9.0400e+00]
 [ 9.7000e+00]
 [-5.8100e+00]
 [ 1.3120e+01]
 [ 2.8200e+00]
 [ 9.3300e+00]
 [-7.2000e-01]
 [ 3.7300e+00]
 [ 1.0380e+01]
 [-1.1210e+01]
 [ 4.8100e+00]
 [ 7.8510e+01]
 [-5.2900e+00]
 [ 7.9200e+00]
 [ 8.0100e+00]
 [ 2.2000e+00]
 [ 2.7700e+00]
 [-2.4000e-01]
 [ 2.7590e+01]
 [ 9.9200e+00]
 [ 1.3780e+01]
 [ 8.9600e+00]
 [ 5.8370e+01]
 [-2.3800e+00]
 [ 6.8600e+00]
 [ 1.4927e+02]
 [-2.3500e+00]
 [ 2.0870e+01]
 [-4.3660e+01]
 [-2.4870e+01]
 [ 2.7520e+01]
 [ 6.5500e+00]
 [ 2.3110e+01]
 [ 5.7000e-01]
 [ 4.1300e+00]
 [-2.3000e+00]
 [-2.3960e+01]
 [ 9.6410e+01]
 [-2.2100e+01]
 [ 1.3000e+00]
 [-1.2840e+01]
 [-1.5600e+00]
 [-3.4800e+00]
 [ 9.0100e+00]
 [ 6.06

  overwrite_a=True).T
