# Machine Learning Project: Predicting a film’s gross revenue

In [None]:
import plotly.express as px
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from pandas.plotting import scatter_matrix
import seaborn as sns


X1 = pd.read_csv("X1.csv" )
Y1 = pd.read_csv("Y1.csv" , header=None , names =["revenue"])


def remove_prefix(text, prefix):
    return text[text.startswith(prefix) and len(prefix):]

def genre_individual(dataset):
    for i in range (len(dataset["genres"])):
        genres_here = dataset["genres"][i].split(",")
        for j in range(len(genres_here)):
            name = "genre"+str(j+1)
            dataset.loc[i, name]=genres_here[j]

def directorAndActors(dataset):
    actorsList = []
    directorList = []
    for i in range (len(dataset["description"])):
        print("\r"+str(i), end= "")
        step1= remove_prefix(dataset["description"][i], dataset["title"][i] + ": ")
        step2= step1.split('. ')
        if ("Directed " in step2[0]):
            step3= remove_prefix(step2[0], "Directed by ")
            if (step3 not in directorList):
                directorList.append(step3)
                dataset[step3] = 0
            dataset.at[i, step3] = 1
            
        elif ("With" in step2[0]):
            step4= remove_prefix(step2[0], "With ").split(', ')
        if (len(step2) >= 2):
            if ("With" in step2[1]):
                step4= remove_prefix(step2[1], "With ").split(', ')
            
        
        for j in range (len(step4)):
            if step4[j] not in actorsList:
                actorsList.append(step4[j])
                dataset[step4[j]] = 0
                dataset.at[i, step4[j]] = 1
            else :
                dataset.at[i, step4[j]] = 1
    return actorsList

    
        
def color_Dico():
    genres = set()

    for i in range (len(X1["genres"])):
        for j in range (len(X1["genres"][i].split(","))):
            genres.add(X1["genres"][i].split(",")[j])
    
    rgb_values = sns.color_palette("Set2", 27)
    dico = {}
    for i in range (len(genres)):
        dico[genres.pop()]=rgb_values[i]
    return dico
    

In [None]:
def preprocessing (data ):
    dataframe = data.copy()
    
    dataframe.insert(10, "genre3", "")
    dataframe.insert(10, "genre2", "")
    dataframe.insert(10, "genre1", "")
    
    genre_individual(dataframe)
    directorAndActors(dataframe)
    
    dataframe=dataframe.drop("Unnamed: 0",axis=1)
    dataframe=dataframe.drop("title",axis=1)
    dataframe=dataframe.drop("img_url",axis=1)
    dataframe=dataframe.drop("description",axis=1)
    dataframe=dataframe.drop("genres",axis=1)
    dataframe=dataframe.drop("is_adult",axis=1) #always == to 0
    
    averageTimeByGenre = {}
    for i in range (len (dataframe["genre1"].unique())):
        averageTimeByGenre[dataframe["genre1"].unique()[i]] = 0
    
    for i in range (len (dataframe["runtime"])):
        if (dataframe["runtime"][i] != '\\N'):
            averageTimeByGenre[dataframe["genre1"][i]]+= int(dataframe["runtime"][i])
            
    for i in range (len (dataframe["genre1"].unique())):
        averageTimeByGenre[dataframe["genre1"].unique()[i]] /= dataframe["genre1"].value_counts()[dataframe["genre1"].unique()[i]]     

    for i in range (len (dataframe["genre1"])):
        if dataframe["runtime"][i] == '\\N':
            dataframe["runtime"][i] = int(averageTimeByGenre[dataframe["genre1"][i]])
            
            
    ListOfGenre = list(set(list(dataframe["genre1"].unique()) + list(dataframe["genre2"].unique()) + list(dataframe["genre3"].unique())))
    ListOfGenre.remove("")
    ListOfGenre.remove('\\N')
    
    for i in range (len(ListOfGenre)):
        dataframe[ListOfGenre[i]] = 0

    for i in range (len(dataframe["genre1"])):
        if dataframe["genre1"][i] != "\\N":
            dataframe[dataframe["genre1"][i]][i] = 3
            
    for i in range (len(dataframe["genre2"])):
        if dataframe["genre2"][i] != "\\N" and dataframe["genre2"][i] != "":
            dataframe[dataframe["genre2"][i]][i] = 2
            
    for i in range (len(dataframe["genre3"])):
        if dataframe["genre3"][i] != "\\N" and dataframe["genre3"][i] != "":
            dataframe[dataframe["genre3"][i]][i] = 1
            
    dataframe=dataframe.drop("genre1",axis=1)
    dataframe=dataframe.drop("genre2",axis=1)
    dataframe=dataframe.drop("genre3",axis=1)
    
    ListOfStudios = list(dataframe["studio"].unique())
    for i in range (len(ListOfStudios)):
        dataframe[ListOfStudios[i]] = 0
    for i in range (len(dataframe["studio"])):
        dataframe[dataframe["studio"][i]][i] = 1
    
    dataframe=dataframe.drop("studio",axis=1)
    print("ah")
    for i in range (len(dataframe)):
        LaList = dataframe['text_embeddings'][i][1:][:-1].split(',')
        dataframe['text_embeddings'][i] = [float(item) for item in LaList] 

    for i in range (len(dataframe)):
        LaList = dataframe['img_embeddings'][i][1:][:-1].split(',')
        dataframe['img_embeddings'][i] = [float(item) for item in LaList] 
    
    
    TXT_embeddings_dataframe = pd.DataFrame(dataframe['text_embeddings'].to_list(), columns=list(range(0,len(dataframe['text_embeddings'][1]))))
    IMG_embeddings_dataframe = pd.DataFrame(dataframe['img_embeddings'].to_list(), columns=list(range(0,len(dataframe['img_embeddings'][1]))))
    
    tsne_IMG = TSNE(random_state = 42, n_components=2, perplexity=50).fit_transform(IMG_embeddings_dataframe)
    tsne_TXT = TSNE(random_state = 42, n_components=2, perplexity=50).fit_transform(TXT_embeddings_dataframe)
    
    tsne_IMG_0 = (list(zip(*tsne_IMG))[0])
    tsne_IMG_1 = (list(zip(*tsne_IMG))[1])
    tsne_TXT_0 = (list(zip(*tsne_TXT))[0])
    tsne_TXT_1 = (list(zip(*tsne_TXT))[1])
    
    dataframe["tsne_IMG_0"] = tsne_IMG_0
    dataframe["tsne_IMG_1"] = tsne_IMG_1
    dataframe["tsne_TXT_0"] = tsne_TXT_0
    dataframe["tsne_TXT_1"] = tsne_TXT_1
    
    dataframe=dataframe.drop("img_embeddings",axis=1)
    dataframe=dataframe.drop("text_embeddings",axis=1)
    print("done")
    return dataframe
    
    

In [None]:
Ready = preprocessing(X1)

In [9]:
Ready.head()

Unnamed: 0,ratings,n_votes,production_year,runtime,release_year,Gary Winick,Amanda Seyfried,Marcia DeBonis,Gael García Bernal,Giordano Formenti,...,Atop.,MTop,PARAIN,CSr,Mael.,Equin.,tsne_IMG_0,tsne_IMG_1,tsne_TXT_0,tsne_TXT_1
0,6.5,92937.0,2010,105,2010.0,1,1,1,1,1,...,0,0,0,0,0,0,-33.048904,33.799595,-13.869236,-14.432333
1,7.9,11.0,1996,83,2014.0,0,0,0,0,0,...,0,0,0,0,0,0,52.751656,-41.301567,-27.271257,-19.766323
2,5.9,1345.0,1978,127,1978.0,0,0,0,0,0,...,0,0,0,0,0,0,12.720545,30.886726,13.19154,-12.381777
3,6.6,4851.0,1994,105,1994.0,0,0,0,0,0,...,0,0,0,0,0,0,-8.656157,5.724644,29.965462,29.147537
4,4.1,549.0,1982,108,1982.0,0,0,0,0,0,...,0,0,0,0,0,0,-27.887983,-7.69136,10.637887,5.2161


In [10]:
X2 = pd.read_csv("X2.csv" )
testSet = preprocessing(X2)

63

  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 

127

  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]]

177

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]]

201

  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[

218

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
 

274

  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  da

342

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 

399

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
 

452

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 

506

  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  da

547

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
 

604

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[

640

  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]]

679

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
 

728

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[

782

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]]

825

  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[ste

853

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
 

909

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]]

947

  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[

994

  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
 

1036

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 

1085

  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  da

1126

  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  da

1181

  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[ste

1208

  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  da

1237

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]]

1273

  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 

1295

  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[

1342

  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
 

1392

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[

1420

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]]

1457

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[ste

1508

  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step3] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]] = 0
  dataset[step4[j]]

1517ah


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe["runtime"][i] = int(averageTimeByGenre[dataframe["genre1"][i]])


ah


  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
  dataframe[ListOfGenre[i]] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe[dataframe["gen

ah


  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[ListOfStudios[i]] = 0
  dataframe[Li

ah


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe['text_embeddings'][i] = [float(item) for item in LaList]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe['img_embeddings'][i] = [float(item) for item in LaList]


ouh




ahhhhhhh


  dataframe["tsne_IMG_0"] = tsne_IMG_0
  dataframe["tsne_IMG_1"] = tsne_IMG_1
  dataframe["tsne_TXT_0"] = tsne_TXT_0
  dataframe["tsne_TXT_1"] = tsne_TXT_1


done


In [11]:
print(testSet.columns.values.tolist())
allColumns = list(set(testSet.columns.values.tolist() + Ready.columns.values.tolist()))
print(allColumns)

for i in range(len (allColumns)):
    if (allColumns[i] not in testSet.columns.values.tolist()):
        testSet[allColumns[i]]=0
    if (allColumns[i] not in Ready.columns.values.tolist()):
        Ready[allColumns[i]]=0

testSet = testSet.reindex(sorted(testSet.columns), axis=1)
Ready = Ready.reindex(sorted(Ready.columns), axis=1)

['ratings', 'n_votes', 'production_year', 'runtime', 'release_year', 'Isabel Coixet', 'Sarah Polley', 'Amanda Plummer', 'Scott Speedman', 'Leonor Watling', 'Robert Breer', 'Harold D', 'Paul Stanley', 'James Arness', 'Milburn Stone', 'Amanda Blake', 'Ken Curtis', 'Robert Boris', 'Rob Lowe', 'Bill Paxton', 'Randy Travis', 'Dana Wheeler-Nicholson', 'Barry Crane', 'Bill Bixby', 'Jack Colvin', 'Lou Ferrigno', 'Tommy Madden', 'Jim Wilson', 'Harvey Keitel', 'Cameron Diaz', 'Craig Sheffer', 'Billy Zane', 'James Polakof', 'James Daughton', 'Stephen Furst', 'Richard Young', 'Jenny Neumann', 'Manoj Agrawal', 'Govinda', 'Rani Mukerji', 'Johny Lever', 'Paresh Rawal', 'Richard Brooks', 'Diane Keaton', 'Tuesday Weld', 'William Atherton', 'Richard Kiley', 'Christopher Guest', 'Chris Farley', 'Matthew Perry', 'Bokeem Woodbine', 'Barry Del Sherman', 'Kevin Keating', 'Wayne Barrett', 'George Bush', 'George W', 'Liam McGrath', 'Francis Barrett', 'Chick Gillen', 'Tom Humphries', 'Colum Flynn', 'Thomas Vint

  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
  testSet[allColumns[i]]=0
 

In [None]:
Ready

In [12]:
from sklearn.linear_model import LinearRegression
revenue = Y1.pop("revenue")
reg = LinearRegression().fit(Ready, revenue)

In [13]:
Y1 = pd.read_csv("Y1.csv" , header=None , names =["revenue"])
reg.score(Ready, revenue)

1.0

In [14]:
prediction = reg.predict(testSet)


In [19]:
np.savetxt("foo.csv", prediction, delimiter=",")