In [1]:
import pickle
import numpy as np
import pandas as pd
from utility.visualization import dealing_null_value

In [2]:
IMPORTANT_FEATURE = ['budget','popularity_log','budget_log','cast_size',
                     'crew_size','runtime','number_of_keywords',
                     'has_collection','has_homepage', 'number_of_languages']

In [3]:
def file_reader(path):
    return pd.read_csv(path)

def load_model():
    with open('../model/best_model.pkl','rb') as f:
        model = pickle.load(f)
    return model

def data_preprocess(data):
    try:
        data['budget_log'] = np.log1p(data['budget'])
        data['popularity_log'] = np.log1p(data['popularity'])
        for i in ['cast','crew','Keywords','spoken_languages']:
            data = dealing_null_value(data,i)
        data['cast_size'] = [len(i) for i in data.cast.values]
        data['crew_size'] = [len(i) for i in data.crew.values]
        data['number_of_keywords'] = [len(i) for i in data.Keywords.values]
        data['number_of_languages'] = [len(i) for i in data.spoken_languages.values]
        data['has_homepage'] = [1 if not pd.isnull(i) else 0 for i in data.homepage.values]
        data['has_collection'] = [1 if not pd.isnull(i) else 0 for i in data.belongs_to_collection.values]
        return data[IMPORTANT_FEATURE].fillna(0)
    
    except Exception as ex:
        print(f'Must pass the dataframe which contain budget, popularity, cast, crew, keywords, homepage and belongs to collection')
        return None
    
    
def predict_values(data,is_path=True):
    if is_path:
        data = file_reader(data)
        
    model = load_model()
    pred_revenue = model.predict(data_preprocess(data))
    pred_revenue = np.expm1(pred_revenue)
    return pd.DataFrame(dict(id=data.id, revenue=pred_revenue))

In [4]:
# you can use the pred_values to get prediction on new samples :

# you can pass the path for the file you want to excute or you can pass dataframe

pred_value = predict_values('../data/ML 1.csv')

#or

dataframe = file_reader('../data/ML 1.csv')
pred_value = predict_values(dataframe, is_path=False)