In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model

matplotlib.rcParams.update({'figure.figsize': (15, 9)})
matplotlib.rcParams.update({'font.size': 16})
matplotlib.rcParams.update({'axes.labelsize': 20})
matplotlib.rcParams.update({'xtick.labelsize': 12})
matplotlib.rcParams.update({'ytick.labelsize': 12})
matplotlib.rcParams.update({'font.family': 'Helvetica, Arial, sans-serif'})
%config InlineBackend.figure_format = 'retina'

sns.set_style("whitegrid")

# Machine Learning lab 4: extending logistic regression
## Jake Rowland and Paul Herz
2017-10-01

## 1. Introduction

Here we return to the dataset of our initial project, [Exploring Table Data](https://github.com/SMU-ML-2017/Project1/blob/master/ML%20Lab%201.ipynb), wherein we graphically analyzed the trends in the [IMDB Top 5000 Movie Dataset](https://www.kaggle.com/deepmatrix/imdb-5000-movie-dataset). One aspect of our analysis in that project centered around dividing the set of films into three groups: **poor**, **average**, and **good**. This was a simplification of IMDB's 10-point rating system, and grouped films into asymmetric quantiles. It allowed us to cut arbitrary lines in the large group of films to illustrate trends among the upper percentile films versus the rest. Here, we will try to use several classification techniques to predict these three classes, and compare the results. One technique will involve a handspun implementation to display the inner workings of such an algorithm.

### 1.1 Background

*Explain the task and what business-case or use-case it is designed to solve (or designed to investigate). Detail exactly what the classification task is and what parties would be interested in the results.*

This dataset represents data compiled entirely by IMDB, whose primary purpose is as a compendium of films. In the previous use of this dataset, we mostly considered how this dataset serves as a representation of trends in American and international cinema. This time, we want to consider the films more in the context of IMDB as an application, and the trends of American film consumers.

### 1.2 Business Case

Using the same arbitrary classifications of **poor**, **average**, and **good**, whose definitions depend on the quantile groupings of IMDB out-of-10 scores, we want to use film data (less the scores) to predict a film's classification into one of these groups. We believe that such a feature will be useful to IMDB, which is likely to receive a sizeable portion of traffic from visitors looking up brand-new films. However, brand-new films do not have sufficient reviews for IMDB to formulate their composite out-of-ten score, and must display a placeholder until there is consensus among a statistically significant number of film critics. Instead of leaving users dissatisfied when they cannot quickly determine whether they should see the film, or how it is performing, IMDB can classify the movie as "probably bad," "probably average," or "probably good." This is not as a replacement to critic composites, but to fill in the blank before reviews come in.

### 1.3 Serviceability

TODO

## 2. The dataset: preprocessing and review

### 2.1 Dataset preparation

TODO

### 2.2 Data quality

TODO

## 3. Something



In [2]:
import pandas as pd

m = pd.read_csv('movie_metadata.csv')

# Reorder the DataFrame to a more intelligent fashion
m = m[[
    'movie_title','title_year',
    'genres', 'plot_keywords', 'duration',
    'budget', 'gross',
    'language', 'country', 'content_rating',
    'color', 'aspect_ratio',
    'facenumber_in_poster',
    'director_name',
    'actor_1_name', 'actor_2_name', 'actor_3_name',
    'movie_facebook_likes', 'director_facebook_likes', 'actor_1_facebook_likes', 'actor_2_facebook_likes',
    'actor_3_facebook_likes', 'cast_total_facebook_likes',
    'movie_imdb_link', 'num_user_for_reviews', 'num_critic_for_reviews', 'num_voted_users',
    'imdb_score',
]]

# Reduce the number of float64 data types for columns that do not need a float64 data type
for col in ['title_year','facenumber_in_poster',
'movie_facebook_likes','actor_1_facebook_likes','actor_2_facebook_likes',
'actor_3_facebook_likes','cast_total_facebook_likes','num_user_for_reviews',
'num_critic_for_reviews','num_voted_users']:
    m[col] = pd.to_numeric(m[col],downcast='integer')
  
# Remove all duplicate entries
m.drop_duplicates(inplace=True)

# Create a copy to perserve the original DataFrame
m_original=m.copy()
m.head()

Unnamed: 0,movie_title,title_year,genres,plot_keywords,duration,budget,gross,language,country,content_rating,...,director_facebook_likes,actor_1_facebook_likes,actor_2_facebook_likes,actor_3_facebook_likes,cast_total_facebook_likes,movie_imdb_link,num_user_for_reviews,num_critic_for_reviews,num_voted_users,imdb_score
0,Avatar,2009.0,Action|Adventure|Fantasy|Sci-Fi,avatar|future|marine|native|paraplegic,178.0,237000000.0,760505847.0,English,USA,PG-13,...,0.0,1000.0,936.0,855.0,4834,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,3054.0,723.0,886204,7.9
1,Pirates of the Caribbean: At World's End,2007.0,Action|Adventure|Fantasy,goddess|marriage ceremony|marriage proposal|pi...,169.0,300000000.0,309404152.0,English,USA,PG-13,...,563.0,40000.0,5000.0,1000.0,48350,http://www.imdb.com/title/tt0449088/?ref_=fn_t...,1238.0,302.0,471220,7.1
2,Spectre,2015.0,Action|Adventure|Thriller,bomb|espionage|sequel|spy|terrorist,148.0,245000000.0,200074175.0,English,UK,PG-13,...,0.0,11000.0,393.0,161.0,11700,http://www.imdb.com/title/tt2379713/?ref_=fn_t...,994.0,602.0,275868,6.8
3,The Dark Knight Rises,2012.0,Action|Thriller,deception|imprisonment|lawlessness|police offi...,164.0,250000000.0,448130642.0,English,USA,PG-13,...,22000.0,27000.0,23000.0,23000.0,106759,http://www.imdb.com/title/tt1345836/?ref_=fn_t...,2701.0,813.0,1144337,8.5
4,Star Wars: Episode VII - The Force Awakens ...,,Documentary,,,,,,,,...,131.0,131.0,12.0,,143,http://www.imdb.com/title/tt5289954/?ref_=fn_t...,,,8,7.1


In [3]:
# Remove foreign films to solve the normalization problem
m = m[m['country'] == 'USA']

# Remove items with non-American or non-film rating systems
m = m[m['content_rating'].isin(['R','PG-13','PG','G'])]

In [4]:
# Categorize the IMDB score into three classes:
# [0%-49%] is Poor, [50%-89%] is Average, [90%-100%] is Good.
poor_avg = m['imdb_score'].quantile(.5)
avg_good = m['imdb_score'].quantile(.9)
m['rating_category'] = pd.cut(m.imdb_score,[0,poor_avg,avg_good,10],labels=['poor','average','good'])

In [5]:
for c in ['movie_title','plot_keywords','actor_1_name','actor_2_name','actor_3_name',
'movie_imdb_link','genres', 'director_name','imdb_score','aspect_ratio','country','language']:
    m.drop(c, axis=1, inplace=True)

In [6]:
# Convert categorical values to category type
for col in ['content_rating','color']:
    m[col] = m[col].astype('category')

In [7]:
# Remove rows where the value is null
for col in ['title_year', 'language','country','content_rating',
'aspect_ratio','duration', 'color','gross','budget','movie_facebook_likes',
'actor_1_facebook_likes','actor_2_facebook_likes','actor_3_facebook_likes',
'cast_total_facebook_likes']:
    try:
        m = m[pd.notnull(m[col])]
    except KeyError:
        pass

In [8]:
# Assume null review counts are 0
for col in ['num_user_for_reviews','num_critic_for_reviews']:
    m[col].fillna(value=0,inplace=True)

In [9]:
# Assume missing face counts are the mean
avgFace = round(m['facenumber_in_poster'].mean())
m['facenumber_in_poster'].fillna(value=avgFace, inplace=True)

In [10]:
# replace color and content_rating with dummies
m = pd.get_dummies(m, columns=['color','content_rating'])
m.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2963 entries, 0 to 5042
Data columns (total 21 columns):
title_year                   2963 non-null float64
duration                     2963 non-null float64
budget                       2963 non-null float64
gross                        2963 non-null float64
facenumber_in_poster         2963 non-null float64
movie_facebook_likes         2963 non-null int32
director_facebook_likes      2963 non-null float64
actor_1_facebook_likes       2963 non-null float64
actor_2_facebook_likes       2963 non-null float64
actor_3_facebook_likes       2963 non-null float64
cast_total_facebook_likes    2963 non-null int32
num_user_for_reviews         2963 non-null float64
num_critic_for_reviews       2963 non-null float64
num_voted_users              2963 non-null int32
rating_category              2963 non-null category
color_ Black and White       2963 non-null uint8
color_Color                  2963 non-null uint8
content_rating_G             2963 n

In [11]:
if m.rating_category.dtype != np.dtype('int8'):
    m.rating_category = m.rating_category.cat.codes
m.rating_category.value_counts()

0    1456
1    1231
2     276
Name: rating_category, dtype: int64

In [12]:
X=m.drop('rating_category', axis=1, inplace=False)
y=np.ravel(m['rating_category'])

split_index = int(len(m)*0.8)
X_train = X[:split_index]
X_predict = X[split_index:]
y_train = y[:split_index]
y_predict = y[split_index:]

type(y_train)

numpy.ndarray

In [13]:
logistic = linear_model.LogisticRegression(multi_class='ovr')
logistic.fit(X_train,y_train)
C = logistic.predict(X_predict)
print('Predicted', pd.Series(C).value_counts())
print('Actual', pd.Series(y_predict).value_counts())

from sklearn.metrics import confusion_matrix



m = confusion_matrix(C,y_predict)
print(m)

logistic.coef_

#  Default
# [[130 156  38]
#  [ 85  98  10]
#  [ 43  31   2]]

Predicted 1    369
0    200
2     24
dtype: int64
Actual 1    285
0    258
2     50
dtype: int64
[[139  60   1]
 [119 220  30]
 [  0   5  19]]


array([[  2.70670632e-06,   9.29206545e-08,   1.94725562e-08,
          4.26883064e-09,   3.03963961e-09,  -7.88845810e-06,
         -1.67991398e-06,   1.22319154e-06,   2.05359998e-07,
          7.82966274e-07,   3.77450250e-06,   1.93695656e-07,
          7.69562936e-08,  -1.52688185e-05,  -6.03054113e-11,
          1.40916377e-09,  -5.27423771e-11,  -7.76609739e-11,
          1.10956127e-09,   3.69700441e-10],
       [ -2.87471917e-06,  -1.04729004e-07,  -6.69589931e-09,
          2.53262363e-09,  -2.91174761e-09,   5.04803943e-06,
          2.30343394e-07,   3.44911795e-06,   3.21321331e-07,
         -6.45138288e-07,   1.41261685e-06,  -2.55820801e-08,
          5.51190949e-08,  -1.04373501e-06,   4.22430355e-11,
         -1.47854414e-09,  -4.18235738e-11,  -2.87056494e-10,
         -1.02639123e-09,  -8.10298036e-11],
       [ -1.43173340e-03,  -6.47588820e-05,  -1.81908834e-08,
         -4.42616069e-09,  -1.24294533e-06,   7.06268752e-06,
          3.56130656e-05,   7.91656450e-05

In [28]:
from scipy.special import expit

class Regression:

    def __init__(self, eta, maxIter=20, C=0.001, opt='GD', reg ='NA'):
        self.eta = eta
        self.maxIter = maxIter
        self.C = C
        self.optimization = opt
        self.regularization = reg
        
    # convenience, private:
    @staticmethod
    def _add_bias(X):
        return np.hstack((np.ones((X.shape[0],1)),X)) # add bias term
    
    @staticmethod
    def _sigmoid(theta):
        # increase stability, redefine sigmoid operation
        return expit(theta) #1/(1+np.exp(-theta))
    
    # vectorized gradient calculation with regularization using L2 Norm
    def _get_gradient(self,X,y):
        # ydiff = y(m,1) - yHat(m,1)
        # The ravel of ydiff is 1-D array of size m
        ydiff = y-self.predict_proba(X,add_bias=False).ravel() # get y difference
        # X is (m,n+1)
        # ydiff is 1-D array of size m. ydiff[:,np.newaxis] is matrix of (m,1)
        # X * ydiff[:,np.newaxis] is matrix (m,n+1) where each row is X[m] @ ydiff
        # Calculate the horizontal average for each row. Result is 1-D array of size m
        gradient = np.mean(X * ydiff[:,np.newaxis], axis=0) # make ydiff a column vector and multiply through
        
        # Reshape the gradient to be matrix (m,1)
        gradient = gradient.reshape(self.w_.shape)
        
        if(self.regularization == 'L1'):
            # L1 = C * sum(|w|)
            gradient[1:] += self.C * np.sum(abs(self.w_[1:])) 
        elif(self.regularization == 'L2'):
            # L2 = C * sum(|w|^2)
            gradient[1:] += np.sum(self.w_[1:]*self.w_[1:]) * self.C
            #gradient[1:] += -2 * self.w_[1:] * self.C
        elif(self.regularization == 'L1/L2'):
            # DOES NOT WORK
            #Calculate the L1 and L2 regularization
            L1 = np.sum(abs(self.w_[1:]))
            L2 = self.w_[1:]*self.w_[1:]
            # Mutlipy the quotient of L1 and L2 by regularization rate
            gradient[1:] += np.divide(L1,L2) * self.C
        
        return gradient
    
    def _gradient_descent(self):
        # for as many as the max iterations
        for _ in range(self.maxIter):
            # Get the gradient of form (m,1)
            gradient = self._get_gradient(self.Xb,self.y)
            # Add the associated gradient to the weight to adjust the weights.
            self.w_ += gradient*self.eta # multiply by learning rate
            
            
    def _stocastic_descent(self):
        for _ in range(self.maxIter):
            for m in range(1,self.num_samples):
                # Calulate the weights for 1st instance, then first 2, then first 3..., then first m instances
                gradient = self._get_gradient(self.Xb[:m], self.y[:m])
                self.w_ += gradient*self.eta # multipy by learning rate
    
            
    def _newton_H(self,X,y):
        # Get P(y=1|x,w_)
        hX = self.predict_proba(X,add_bias=False)
        # get P(y=0|x,w_)
        one_hX = 1 - hX
        
        # Multiply together
        prob_Mult = hX*one_hX
        
        # Create matrix to perform average on
        H = np.zeros((X.shape[1], X.shape[1]))
        for row in range(X.shape[0]):
            xi = X[row] # Get the row of X
            xi.shape = (X.shape[1],1) #Convert to 2D array
            # Get the transpose of the row
            xiT = xi.T
            
            # Create a matrix from x and xT
            xMat = xi*xiT
            
            # Scalar multipy the probibility multiplyer 
            # and the xMatrix
            xMat = prob_Mult[row] * xMat
            
            # Add the matrix to accumulation matrix
            H = H + xMat
            
        # Divide the accumulation matrix by the number of samples to get average
        H = H / X.shape[0]
        
        return H
    
        
    def _newton_method(self):
        for _ in range(self.maxIter):
            # Get the hessian matrix (Second derivative)
            H = self._newton_H(self.Xb, self.y)
            # Get the gradient (First Derivative)
            gradient = self._get_gradient(self.Xb, self.y)
            try:
                # Attempt to inverse the hessian
                H_inv = np.linalg.inv(H)
            
                # Modify the w_(t+1) = w_(t) - (dJ(w_))/(d^2J(w_))
                self.w_ -= (H_inv @ gradient)
            except:
                print('Singularity Matrix')
    
    # public:
    # Takes (m,n) matrix - X. Add bias term to X matrix to create (m,n+1) matrix - Xb
    # Calculate the dot product of Xb(m,n+1) and w_(n+1,1) -> result is matrix of (m,1)
    # Calculate the sigmoid for each value of the dot product and return a matrix of (m,1)
    def predict_proba(self,X,add_bias=True):
        # add bias term if requested
        Xb = self._add_bias(X) if add_bias else X
        return self._sigmoid(Xb @ self.w_) # return the probability y=1
    
    def predict(self,X,prob=.5):
        return (self.predict_proba(X) >= prob)
    
    def predict_raw(self,X):
        return (self.predict_proba(X))
        
    def fit(self, X, y):
        self.Xb = self._add_bias(X) # add bias term
        self.num_samples, self.num_features = self.Xb.shape
        self.y = y
        
        self.w_ = np.zeros((self.num_features,1)) # init weight vector to zeros
        
        # Select the optimization process
        if(self.optimization == 'GD'):
            self._gradient_descent()
        elif (self.optimization == 'SGD'):
            self._stocastic_descent()
        elif (self.optimization == 'NWT'):
            self._newton_method()
        else:
            self._gradient_descent()

In [29]:
class MultiClassRegression:
    
    def __init__(self, eta, maxIter=20, C=0.001, opt='GD', reg ='NA'):
        self.eta = eta
        self.maxIter = maxIter
        self.C = C
        self.optimization = opt
        self.regularization = reg
        
    def fit(self, X, y):
        self.X = X
        self.true_y = y
        
        # Find unique classes
        self.classes = np.unique(y).tolist()
        
        #Create dictonary to hold regression class associated with the class as the key
        self.reg = {}
        
        # Define training classes as matrix of (n_class, n_samples)
        self.y_classes = np.zeros((len(self.classes),X.shape[0]))
        # Define weights as matrix of (n_class, n_features)
        self.w_ = np.zeros((len(self.classes),(X.shape[1]+1)))
        
        # For every class
        for cur_class in self.classes:
            # Get the indexes of all classes that are positive
            idx = y == cur_class
            
            # Define the positive classes as 1 in a zeroed array
            self.y_classes[cur_class][idx] = 1
            
            # Find the index of the class (for non numerical classes)
            idx_class = self.classes.index(cur_class)
            
            # Create a binary logistic classifier with the parameters passed to the multiclass logistic classifier
            self.reg[idx_class] = Regression(self.eta, self.maxIter, self.C, self.optimization, self.regularization)
            # Fit all the binary logistic classifiers with the training data and their associated training classes
            self.reg[idx_class].fit(X,self.y_classes[cur_class])
            
            # Copy weights from the binary to the associated location in the multiclass logistic classifer
            self.w_[idx_class] = self.reg[idx_class].w_.ravel()
            
            
    def predict(self, X):
        # Create a prediction matrix of size (n_classes, n_samples)
        prediction = np.zeros((len(self.classes),len(X)))
        
        # For each binary logistic classifire
        for key, reg in self.reg.items():
            # Get index of key (for non numerical classes)
            idx = self.classes.index(key)
            # Predict the raw probability for each sample and make it a 1D array
            prediction[key] = reg.predict_raw(X).ravel()
            
        # Return the row index that has the maximum prediction value
        return np.argmax(prediction, axis = 0)
    
    def predict_raw(self, X):
        # Create a prediction matrix of size (n_classes, n_samples)
        prediction = np.zeros((len(self.classes),len(X)))
        
        # For each binary logistic classifire
        for key, reg in self.reg.items():
            # Get index of key (for non numerical classes)
            idx = self.classes.index(key)
            # Predict the raw probability for each sample and make it a 1D array
            prediction[key] = reg.predict_raw(X).ravel()
            
        # Return raw prediciton values for each class against each sample
        return prediction
        

In [36]:
#Do not remove. This is our best estimation so far
multiR_one = MultiClassRegression(eta=1, maxIter=500, C=0.1, opt='GD', reg='NA')
multiR_two = MultiClassRegression(eta=1, maxIter=10, C=1, opt='NWT', reg='NA')
multiR_three = MultiClassRegression(eta=0.01, maxIter=5, C=0.0001, opt='NWT', reg='L2')

In [37]:
#Normalize the data to better fit a gradient curve
X_predict_norm = X_predict
X_train_norm = X_train
for col in X_train_norm.columns.values:
    mx = X_train_norm[col].max()
    X_train_norm[col] = X_train_norm[col]/mx
    X_predict_norm[col] = X_predict_norm[col]/mx
    
#NOT USED CURRENTLY

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [38]:
multiR_one.fit(X_train_norm, y_train)
pred_one = multiR_one.predict(X_predict_norm)

print(pred_one)

m = confusion_matrix(pred_one,y_predict)
print(m)

[0 1 0 0 0 0 0 0 0 1 2 1 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0
 1 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1 2 0 0 1 0 0 0 1 0 0 0 0 0 1 0
 0 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 2 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 0 1 1 1 0 2 1 1 0 0 0 0 0 0 0
 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 1 0 0 1 1 1 0 0 1 0 2 0 2 0 0 1 0 0 1 1 1 1 0 0 0 1 0 1 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 1 0 0
 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1
 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 0 0
 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0
 0 1 0 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 2 0 0 0 0 0 1 0
 1 0 1 0 0 1 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 1 0 0 0 0 0 0 0 0 0 

In [39]:
multiR_two.fit(X_train_norm, y_train)
pred_two = multiR_two.predict(X_predict_norm)

print(pred_two)

m = confusion_matrix(pred_two,y_predict)
print(m)


Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
[1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 1 1 0 0 2 1 0 1 1 0 1 1
 1 1 1 0 0 0 1 1 1 1 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 1 1 1 1
 0 1 0 0 0 1 0 0 1 1 0 0 1 0 1 1 1 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 1 1 1 1 2
 2 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 0 1 1 0 1 1 1 1 0 1 0 1 0 1 1 1 0 1
 1 0 0 1 0 1 0 0 0 0 1 1 1 1 1 0 1 1 1 0 1 1 0 1 1 0 1 0 1 1 1 2 0 0 1 1 1
 1 1 0 0 0 1 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 0 0 1 0 1 1 0 0 1 1 1 1 1 0 0
 0 1 1 1 1 0 1 0 0 0 2 0 1 1 0 0 1 0 1 1 1 0 0 0 1 2 1 1 0 0 0 0 1 0 2 1 0
 0 1 0 0 1 1 1 0 0 0 1 1 0 0 0 0 1 2 0 0 1 1 0 0 0 0 1 1 

In [40]:
multiR_three.fit(X_train_norm, y_train)
pred_three = multiR_three.predict(X_predict_norm)

print(pred_three)

m_three = confusion_matrix(pred_three,y_predict)
print(m_three)

Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
Singularity Matrix
[0 2 2 0 2 0 0 2 2 0 0 2 0 2 2 2 2 0 2 0 0 0 0 0 2 0 0 0 2 0 0 0 0 0 0 0 2
 0 2 0 2 2 0 0 0 0 0 0 2 0 2 0 2 0 0 0 0 0 2 2 2 2 2 2 2 0 2 2 2 0 0 2 0 0
 0 0 2 0 2 0 2 2 2 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 2 2 0 2 0 2 0 0
 0 2 2 2 2 2 2 0 0 0 2 0 2 0 2 0 0 2 2 2 0 0 0 0 0 0 0 2 0 2 0 2 0 0 0 0 0
 0 2 2 0 2 0 0 0 2 2 0 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 2 2 0 2 0
 0 0 2 2 2 2 2 0 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 2 2 0 0 0 0 0 2 0
 0 0 0 0 0 2 0 2 2 2 2 2 0 0 0 2 0 2 0 0 0 0 2 2 0 2 0 0 2 2 2 0 0 0 0 0 0
 0 0 0 2 0 0 0 0 0 2 0 0 0 2 0 2 0 0 2 2 0 2 2 2 2 2 2 0 0 0 0 0 0 0 2 0 0
 0 2 2 2 2 0 2 2 2 2 2 2 0 0 0 2 2 2 0 2 2 2 2 2 0 2 0 0 0 0 0 0 0 0 0 2 2
 0 0 0 0 2 2 2 0 2 2 2 0 0 0 0 0 2 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 2 2 0 0 0
 2 2 2 0 0 0 2 0 2 0 0 2 0 0 2 2 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 2 0 0
 0 2

In [35]:
multiR_three.w_

array([[ -6.15897934e+39,   6.57544325e+29,  -1.16533550e+28,
         -1.37026175e+26,  -8.20796552e+27,  -3.57266759e+27,
         -1.10843915e+28,   1.54019168e+27,  -2.06214110e+30,
         -4.48954012e+29,  -1.07156152e+29,   2.10295374e+30,
          1.15803799e+28,  -9.99531237e+27,   3.21051096e+27,
          5.65759085e+39,   5.65759085e+39,   5.01388493e+38,
          5.01388493e+38,   5.01388493e+38,   5.01388493e+38],
       [ -6.00610322e+40,   2.64093005e+30,   5.48922988e+28,
         -4.21708410e+27,  -3.56063312e+28,  -2.63169492e+28,
         -3.00824351e+26,   1.27186579e+28,  -8.26453587e+30,
         -1.73566991e+30,  -4.53611744e+29,   8.36368926e+30,
         -3.23727480e+27,  -5.74983452e+28,   7.87750701e+28,
          5.43325724e+40,   5.43325724e+40,   5.72845979e+39,
          5.72845979e+39,   5.72845979e+39,   5.72845979e+39],
       [ -1.38840436e+43,  -1.34689381e+33,   1.77711742e+31,
          2.09323282e+29,   4.27720185e+31,   1.01006360e+31,
      