# Create expert ranking from scrapped data

In [1]:
# export
import numpy as np
import pandas as pd
import json
from sklearn.preprocessing import MinMaxScaler as ScalerType
from sklearn.linear_model import Ridge as LinearModel
import sklearn

In [2]:
import pdb

## Parameters

In [3]:
batch_size = 50
n_epochs = 50
label_column = 'combined_ranking'
meta_file = 'movie_expert_meta'
input_file = 'movies'

In [4]:
data = pd.read_csv('Data/' + input_file + '.csv', sep = ',', index_col = 'id')

In [5]:
data.head()

Unnamed: 0_level_0,title,certificate,runtime,year,genre,last_update,imdb_rating,imdb_votes,rt_critic_rating,rt_critic_votes,rt_audience_rating,rt_audience_votes,mc_rating,mc_votes
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3778644,Solo: A Star Wars Story,PG-13,135,2018,"Action, Adventure, Fantasy",10/06/2018,72,87482,60,50,65,32926,62,54
5463162,Deadpool 2,R,119,2018,"Action, Adventure, Comedy",10/06/2018,81,145439,67,46,86,24309,66,51
4154756,Avengers: Infinity War,PG-13,149,2018,"Action, Adventure, Fantasy",10/06/2018,88,368802,73,48,91,43608,68,53
1825683,Black Panther,PG-13,134,2018,"Action, Adventure, Sci-Fi",10/06/2018,75,326281,100,50,79,80444,88,55
6644200,A Quiet Place,PG-13,90,2018,"Drama, Horror, Sci-Fi",10/06/2018,79,121146,100,40,84,15271,82,55


In [6]:
expert_meta = json.load(open('Data/' + meta_file + '.json', 'r'))

In [7]:
expert_meta

{'names': ['imdb', 'rt_critic', 'rt_audience', 'mc'],
 'cols': ['_rating', '_votes']}

## Loading in the data

First the number of votes take a natural logirithm. We only care if one object has a mannitude of ratings above another item.

In [8]:
# export
def logNVotes(data, expert_meta):
    for name in expert_meta['names']:
        col_name = name + '_votes'
        data[col_name] = data[col_name].map(lambda x : np.log(x) if x > 0 else x)

In [9]:
logNVotes(data, expert_meta)

In [10]:
data.head()

Unnamed: 0_level_0,title,certificate,runtime,year,genre,last_update,imdb_rating,imdb_votes,rt_critic_rating,rt_critic_votes,rt_audience_rating,rt_audience_votes,mc_rating,mc_votes
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3778644,Solo: A Star Wars Story,PG-13,135,2018,"Action, Adventure, Fantasy",10/06/2018,72,11.379188,60,3.912023,65,10.402018,62,3.988984
5463162,Deadpool 2,R,119,2018,"Action, Adventure, Comedy",10/06/2018,81,11.887512,67,3.828641,86,10.098602,66,3.931826
4154756,Avengers: Infinity War,PG-13,149,2018,"Action, Adventure, Fantasy",10/06/2018,88,12.818015,73,3.871201,91,10.682996,68,3.970292
1825683,Black Panther,PG-13,134,2018,"Action, Adventure, Sci-Fi",10/06/2018,75,12.695514,100,3.912023,79,11.295317,88,4.007333
6644200,A Quiet Place,PG-13,90,2018,"Drama, Horror, Sci-Fi",10/06/2018,79,11.704752,100,3.688879,84,9.633711,82,4.007333


Everything that is negative 1 is missing and it is set at a NaN value. Everything else is scaled.

In [11]:
# export
scaler = ScalerType()

def converSeriesToInput(series):
    return series.where(series > 0).values.reshape(-1, 1)

def normaliseData(data, expert_meta):
    for name in expert_meta['names']:
        for col in expert_meta['cols']:
            col_name = name + col
            data[col_name] = data[col_name].astype(np.float64)
            scaler.fit(converSeriesToInput(data[col_name]))
            transformedInputToFloat = lambda x : scaler.transform([[x]])[0][0] if (x > 0) else np.nan
            data[col_name] = data[col_name].map(transformedInputToFloat)

In [12]:
normaliseData(data, expert_meta)

In [13]:
data.head()

Unnamed: 0_level_0,title,certificate,runtime,year,genre,last_update,imdb_rating,imdb_votes,rt_critic_rating,rt_critic_votes,rt_audience_rating,rt_audience_votes,mc_rating,mc_votes
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3778644,Solo: A Star Wars Story,PG-13,135,2018,"Action, Adventure, Fantasy",10/06/2018,0.233333,0.410446,0.565217,0.581096,0.484375,0.459186,0.616162,0.970273
5463162,Deadpool 2,R,119,2018,"Action, Adventure, Comedy",10/06/2018,0.533333,0.506826,0.641304,0.563874,0.8125,0.435715,0.656566,0.951086
4154756,Avengers: Infinity War,PG-13,149,2018,"Action, Adventure, Fantasy",10/06/2018,0.766667,0.683252,0.706522,0.572664,0.890625,0.480921,0.676768,0.963999
1825683,Black Panther,PG-13,134,2018,"Action, Adventure, Sci-Fi",10/06/2018,0.333333,0.660026,1.0,0.581096,0.703125,0.528287,0.878788,0.976433
6644200,A Quiet Place,PG-13,90,2018,"Drama, Horror, Sci-Fi",10/06/2018,0.466667,0.472174,1.0,0.535007,0.78125,0.399754,0.818182,0.976433


Initialise 

In [14]:
data[label_column] = 0.5

In [15]:
data = data.sort_index()

## Create expert object

First we create a function that will return a slice of the dataset from the main dataframe.

In [16]:
# export
def getDataIndexSlice(data : pd.DataFrame, i : int, batch_size: int):
    data_slice = data.iloc[i * batch_size : (i+1) * batch_size]
    return data_slice.to_dict(), data_slice.index

In [17]:
assert(len(getDataIndexSlice(data, 0, batch_size)[0][label_column]) == len(getDataIndexSlice(data, 0, batch_size)[1]))

In [18]:
# export
class Expert:
    
    def __init__(self, expert_name : str, col_suffixes : list, data : pd.DataFrame, 
            clf_type : sklearn.linear_model, batch_size: int, label_column: str, **kwargs):
        
        self.expert_name = expert_name
        self.data = data
        self.col_names = [expert_name + suf for suf in col_suffixes]
        self.label_col = label_column
        self.clf = clf_type(**kwargs)
        self.valid_index = self.createValidIndices(data)
        self.valid_len = len(self.valid_index)
        self.batch_size = batch_size
        self.n_batches = (self.valid_len + batch_size - 1) // batch_size
        
        self.data_view = None
        self.precision = 1.
        self.preds = np.ndarray(batch_size, dtype = np.float64)
        self.input_arr = np.ndarray(len(self.col_names), dtype = np.float64)
        self.preds.fill(np.nan)
        
        self.total_error = 0
        self.n_preds = 0
        self.current_index = 0
        self.compute_errors = True
        
    def resetCurrentIndex(self):
        self.current_index = 0
        self.total_error = 0.
        self.n_preds = 0
        
    ## Functions for getting training batches    
    
    def createValidIndices(self, data : pd.DataFrame):
        return data[np.all([data[col] > 0 for col in self.col_names], axis = 0)].index
    
    def getValidDataView(self):
        return np.array(self.data[self.col_names].loc[
                self.valid_index[self.current_index: np.min((self.current_index + self.batch_size, self.valid_len))]
        ])
    
    def getValidCombinedRating(self):
        return np.array(self.data[self.label_col].loc[
                self.valid_index[self.current_index: np.min((self.current_index + self.batch_size, self.valid_len))]
        ])
        
    def getBatch(self):
        train_x = self.getValidDataView()
        train_y = self.getValidCombinedRating()
        self.current_index += self.batch_size
        return train_x, train_y
        
    def trainBatch(self):
        train_x, train_y = self.getBatch()
        self.clf.fit(train_x, train_y)
        
    def trainAllData(self):
        self.resetCurrentIndex()
        for i in range(self.n_batches):
            self.trainBatch()
    
    ## Prediction functions for finding error and updating expert precision
    
    def getInputArrayFromDict(self, input_data : dict, input_index : pd.Index):
        self.input_arr[:] = [input_data[col][input_index] for col in self.col_names]
    
    def getPredictionErrorOnIndex(self, input_data : dict, input_indices : pd.Index, i : int):
        self.getInputArrayFromDict(input_data, input_indices[i])
        self.preds[i] = self.clf.predict(self.input_arr.reshape(1, -1))
        self.current_index += 1
        if self.compute_errors:
            self.total_error += (self.preds[i] - input_data[self.label_col][input_indices[i]])**2
            self.n_preds += 1
    
    def getPredictionForIndex(self, input_data : dict, input_indices : pd.Index, i : int):
        if not(self.current_index < self.valid_len):
            self.preds[i] = np.nan
        elif input_indices[i] != self.valid_index[self.current_index]:
            self.preds[i] = np.nan
        else:
            self.getPredictionErrorOnIndex(input_data, input_indices, i)
    
    def predictValues(self, input_data : dict, input_indices : pd.Index):
        n_input = len(input_indices)
        for i in range(n_input):
            self.getPredictionForIndex(input_data, input_indices, i)
        return self.preds[:n_input]
    
    def predictOnAllData(self, batches_in_data : int):
        self.resetCurrentIndex()
        for i in range(batches_in_data):
            input_data, input_indices = getDataIndexSlice(self.data, i, self.batch_size)
            self.predictValues(input_data, input_indices)
    
    def updatePrecision(self):
        if (self.n_preds == 0) or (self.total_error == 0):
            self.precision = 1.
        else:
            self.precision = 1/np.sqrt(self.total_error/self.n_preds)
        return self.total_error
            
    def __str__(self):
        return self.expert_name
    
    def __repr__(self):
        return self.expert_name
        

Create a stub `Expert` for testing called bert.

In [19]:
input_cols = ['_rating', '_votes']

In [20]:
bert = Expert('imdb', input_cols, data, LinearModel, batch_size, label_column,
               random_state = 0, max_iter = 1, tol = 0)

The expert can be queried to request the data for the indexes where the expert predicted. The number of valid indices should be be subset of the number of indices in the original dataset. A batch of valid predictions can be used for training. A valid entry must have all data fields (or `input_cols` as defined above) to have valid values to be suitable for training.

In [21]:
len(bert.valid_index)

3044

In [22]:
assert(len(bert.valid_index) < len(data))

In [23]:
assert(np.linalg.norm(np.array(bert.getValidDataView().shape) - (batch_size, len(input_cols))) < 1e-7)

In [24]:
assert(np.linalg.norm(np.array(bert.getValidCombinedRating().shape) - (batch_size)) < 1e-7)

In [25]:
bert.trainBatch()

In [26]:
params = np.concatenate((bert.clf.coef_, [bert.clf.intercept_]), axis = 0); params

array([0. , 0. , 0.5])

At this point the final prediciton is a constant value of 0, 0 for the weight parameters and a constant intercept of 0.5 is optimal since the target rating is a constant 0.5. Any model that doesn't predict this is either unsuitable or it has not been set-up properly.

In [27]:
assert(np.linalg.norm(params - np.array([0., 0., 0.5])) < 1e-3)

In [28]:
input_data_batch = data.loc[data.index[:50]].to_dict()

Update the input array of the 

In [29]:
bert.input_arr

array([2.12199579e-314, 6.36598737e-314])

In [30]:
bert.getInputArrayFromDict(input_data_batch, data.index[0])

In [31]:
bert.input_arr

array([0.56666667, 0.24106935])

In [32]:
assert(np.linalg.norm(bert.input_arr - (data.iloc[0]['imdb_rating'], data.iloc[0]['imdb_votes'])) < 1e-7)

In [33]:
bert.resetCurrentIndex()

In [34]:
stub_index = list(data.index[:batch_size]); stub_index.pop(0); stub_index.pop(-1);

In [35]:
bert.valid_index = stub_index
bert.valid_len = batch_size - 2

In [36]:
preds = bert.predictValues(input_data_batch, data.index[:50])

In [37]:
print(preds[:10], preds[-10:])

[nan 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5] [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 nan]


In [38]:
assert(np.isnan(preds[0]))

In [39]:
assert(np.isnan(preds[-1]))

## Evaluate data according to experts

In [40]:
# export
class ExpertEnsemble:
    
    def __init__(self, experts : list, data : pd.DataFrame,  scaler : sklearn.preprocessing, 
                 batch_size: int, label_col: str):

        self.experts = experts
        self.data = data
        self.scaler = scaler
        self.label_col = label_col
        
        self.n_experts = len(self.experts)
        self.experts_intiliased = False
        
        self.output_prediction = np.ndarray(batch_size, dtype = np.float64)
        self.batch_predictions = np.ndarray((self.n_experts, batch_size), dtype = np.float64)
        self.batch_weights = np.ndarray((self.n_experts, batch_size), dtype = np.float64)
        self.valid_len = len(self.data.index)
        self.batch_size = batch_size
        self.n_batches = (self.valid_len + batch_size - 1) // batch_size
        self.total_error = 0.
        self.weight_vector = np.ndarray((self.n_experts, batch_size), dtype = np.float64)
        self.weight_vector.fill(1/self.n_experts)
    
    def updateWeightVector(self):
        self.weight_vector[:, :] = np.array(
            [[expert.precision for expert in self.experts] for i in range(self.batch_size)], 
            dtype = np.float64).transpose()
        sum_weight = np.sum(self.weight_vector)
        if sum_weight == 0: 
            self.weight_vector.fill(1/self.n_experts)
        else:
            self.weight_vector /= sum_weight
    
    def updatePrecisions(self):
        self.turnOnErrors()
        self.total_error = 0.
        for expert in self.experts:
            expert.predictOnAllData(self.n_batches)
            self.total_error += expert.updatePrecision()
        self.updateWeightVector()
    
    def trainExperts(self):
        for expert in self.experts:
            expert.trainAllData()
            
    def turnOnErrors(self):
        for expert in self.experts: expert.compute_errors = True
            
    def turnOffErrors(self):
        for expert in self.experts: expert.compute_errors = False
            
    def resetAllIndices(self):
        for expert in self.experts: expert.resetCurrentIndex()
    
    def computeOutputPrediction(self, n_input : int):
        self.output_prediction[:n_input] = np.nansum(
            self.weight_vector[:, :n_input] * self.batch_predictions[:, :n_input], axis = 0)
    
    def setBatchPredictions(self, input_data : dict, input_indices: pd.Index, i : int, n_input: int):
        if self.experts_intiliased:
            for j in range(self.n_experts):
                self.batch_predictions[j, :n_input] = self.experts[j].predictValues(input_data, input_indices)
        else:
            for j in range(self.n_experts):
                self.batch_predictions[j, :n_input] = list(input_data[self.experts[j].expert_name + '_rating'].values())
    
    def scaleOutput(self):
        scaler.fit(converSeriesToInput(self.data[self.label_col]))
        scaleLabelColumn = lambda x : scaler.transform([[x]])[0][0]
        self.data[self.label_col] = self.data[self.label_col].map(scaleLabelColumn)
    
    def updateAggregatedPrediction(self):
        self.turnOffErrors()
        self.resetAllIndices()
        for i in range(self.n_batches):
            input_data, input_indices = getDataIndexSlice(self.data, i, self.batch_size)
            n_input = len(input_indices)
            self.setBatchPredictions(input_data, input_indices, i, n_input)
            self.computeOutputPrediction(n_input)
            self.data.at[input_indices, self.label_col] = self.output_prediction[:n_input]
        self.scaleOutput()
    
    def iterativelyPredictRanking(self, n_epochs : int):
        self.updateAggregatedPrediction()
        self.experts_intiliased = True
        for i in range(1, n_epochs + 1):
            self.trainExperts()
            self.updateAggregatedPrediction()
            self.updatePrecisions()
            print('Epoch', str(i), 'completed with total error', self.total_error)
            
    def __str__(self):
        return str(self.experts)
    
    def __repr__(self):
        return str(self.experts)

## Create Iteration Algorithm

In [41]:
# export
def geneterateExperts(expert_meta, data, LinearModel, batch_size, label_column, **kwargs):
    return [Expert(name, expert_meta['cols'], data, LinearModel, batch_size, label_column, 
            **kwargs) for name in expert_meta['names']]

In [42]:
experts =  geneterateExperts(expert_meta, data, LinearModel, batch_size, label_column,
                             max_iter = 1, alpha = 1., fit_intercept = False)

In [43]:
kaggle_of_geese = ExpertEnsemble(experts, data, scaler, batch_size, label_column)

In [44]:
kaggle_of_geese.iterativelyPredictRanking(n_epochs)

Epoch 1 completed with total error 194.86090940117663
Epoch 2 completed with total error 250.02010558526075
Epoch 3 completed with total error 207.74764439092965
Epoch 4 completed with total error 240.47971824302655
Epoch 5 completed with total error 223.58126376852385
Epoch 6 completed with total error 236.16336779175447
Epoch 7 completed with total error 232.04011065758044
Epoch 8 completed with total error 236.46547063385606
Epoch 9 completed with total error 236.44493371678692
Epoch 10 completed with total error 238.3749125200712
Epoch 11 completed with total error 239.2773929605645
Epoch 12 completed with total error 240.51074856519315
Epoch 13 completed with total error 241.5056348616249
Epoch 14 completed with total error 242.5123500719916
Epoch 15 completed with total error 243.44299009356732
Epoch 16 completed with total error 244.3414248223528
Epoch 17 completed with total error 245.2000743074547
Epoch 18 completed with total error 246.0290795643834
Epoch 19 completed with to

## Test the Expert Ensemble

The error does not need to be minimised but we expect it to converge after sufficiently many epochs. The initial prediction is determined by every expert being equally weighted.

In [45]:
kaggle_of_geese

[imdb, rt_critic, rt_audience, mc]

Assert that the maximum is one.

In [46]:
assert(np.abs(np.max(data[label_column]) - 1) < 1e-5)

Show the highest rated film on the list. Sence check that it is good.

In [47]:
data.loc[data[label_column].idxmax()]

title                      The Dark Knight
certificate                          PG-13
runtime                                152
year                                  2008
genre                 Action, Crime, Drama
last_update                     10/06/2018
imdb_rating                       0.833333
imdb_votes                        0.997274
rt_critic_rating                  0.913043
rt_critic_votes                   0.589197
rt_audience_rating                  0.9375
rt_audience_votes                 0.769966
mc_rating                         0.818182
mc_votes                          0.861032
combined_ranking                         1
Name: 468569, dtype: object

Asset that the minimum is zero.

In [48]:
assert(np.abs(np.min(data[label_column])) < 1e-5)

Look at the worst film on the list. Sense check that it is gabbage.

In [49]:
data.loc[data[label_column].idxmin()]

title                                Chaos
certificate                              R
runtime                                106
year                                  2005
genre                 Action, Crime, Drama
last_update                     18/06/2018
imdb_rating                              0
imdb_votes                        0.293782
rt_critic_rating                       NaN
rt_critic_votes                        NaN
rt_audience_rating                0.359375
rt_audience_votes                 0.424877
mc_rating                                0
mc_votes                          0.368795
combined_ranking                         0
Name: 402910, dtype: object

Displays the entire list.

In [50]:
data.sort_values(by = [label_column], ascending = False)

Unnamed: 0_level_0,title,certificate,runtime,year,genre,last_update,imdb_rating,imdb_votes,rt_critic_rating,rt_critic_votes,rt_audience_rating,rt_audience_votes,mc_rating,mc_votes,combined_ranking
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
468569,The Dark Knight,PG-13,152,2008,"Action, Crime, Drama",10/06/2018,0.833333,0.997274,0.913043,0.589197,0.937500,0.769966,0.818182,0.861032,1.000000
111161,The Shawshank Redemption,R,142,1994,"Crime, Drama",10/06/2018,0.933333,1.000000,0.934783,0.345752,1.000000,0.713717,0.797980,0.636847,0.990207
167260,The Lord of the Rings: The Return of the King,PG-13,201,2003,"Adventure, Drama, Fantasy",12/06/2018,0.800000,0.936117,0.956522,0.576923,0.812500,0.997545,0.939394,0.877820,0.985407
68646,The Godfather,Not Rated,175,1972,"Crime, Drama",10/06/2018,0.900000,0.928163,0.945652,0.401918,1.000000,0.699042,1.000000,0.540275,0.968686
120737,The Lord of the Rings: The Fellowship of the Ring,PG-13,178,2001,"Adventure, Drama, Fantasy",10/06/2018,0.766667,0.938433,0.923913,0.563874,0.953125,0.746734,0.919192,0.814975,0.967541
110912,Pulp Fiction,R,154,1994,"Crime, Drama",10/06/2018,0.800000,0.953380,0.913043,0.446031,0.968750,0.732349,0.939394,0.698051,0.964439
167261,The Lord of the Rings: The Two Towers,PG-13,179,2002,"Adventure, Drama, Fantasy",12/06/2018,0.733333,0.917068,1.000000,0.568316,0.953125,0.745914,0.868687,0.861032,0.959725
1375666,Inception,PG-13,148,2010,"Action, Adventure, Sci-Fi",10/06/2018,0.766667,0.974808,0.836957,0.596992,0.890625,0.679514,0.737374,0.885909,0.948352
108052,Schindler's List,R,195,1993,"Biography, Drama, History",12/06/2018,0.800000,0.874556,1.000000,0.391841,0.984375,0.654332,0.929293,0.683764,0.929716
120815,Saving Private Ryan,R,169,1998,"Drama, War",10/06/2018,0.700000,0.878616,0.891304,0.529778,0.953125,0.722618,0.898990,0.824705,0.919341


We expect the rating and the number of people voting to be positively correlated with the overall score that is given. It is preferable that one predicter does not dominate the others. Experts are weighted by their precision.

In [51]:
for expert in experts: print(expert, expert.precision, expert.clf.coef_, expert.clf.intercept_)

imdb 10.150383498882704 [0.37834119 0.8553767 ] 0.0
rt_critic 6.055146644547952 [0.3834681  0.24964925] 0.0
rt_audience 7.243911124923989 [0.42448126 0.25060935] 0.0
mc 5.789519296555376 [0.32601684 0.26129987] 0.0


In [52]:
data[data['title'] == 'The Incredibles']

Unnamed: 0_level_0,title,certificate,runtime,year,genre,last_update,imdb_rating,imdb_votes,rt_critic_rating,rt_critic_votes,rt_audience_rating,rt_audience_votes,mc_rating,mc_votes,combined_ranking
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
317705,The Incredibles,PG,115,2004,"Animation, Action, Adventure",10/06/2018,0.5,0.757555,0.956522,0.568316,0.640625,0.992771,0.89899,0.87782,0.819962


## Save the CSV

In [53]:
#data.to_csv('Data/' + input_file + '_combined_ranking.csv', mode = 'w')

## Export

In [54]:
!python notebook2script.py GroupedPrediction.ipynb

Converted GroupedPrediction.ipynb to Lib\nb_GroupedPrediction.py
