# Ajustes dos dados com Random Forest

## Importando as bibliotecas

In [1236]:
import pandas as pd
import ast
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sqlalchemy import create_engine

## Criando a engine de conexão com o banco de dados

In [1206]:
path_db = '../db/filmes.db'

engine = create_engine(f'sqlite:///{path_db}')

# Carregando o DataFrame

Vamos carregar o DataFrame partindo da databela `filmes` no banco de dados. 

In [1207]:
df = pd.read_sql('SELECT * from filmes', con=engine)

# Criando novas colunas

Partindo de que as colunas `genres` e `release_date` do `df` não servirão como `features` para inputar as colunas `budget` e `revenue`, criaremos as colunas numéricas `num_genres` e `release_year`. Para fazermos isso, primeiramente iremos transformar em `lista` os elementos da coluna `genres` que até então são `string de lista`. 

In [1208]:
df['genres'] = df['genres'].apply(ast.literal_eval)

Criando a coluna `num_genres`

In [1209]:
df['num_genres'] = df['genres'].apply(lambda x: len(x) if isinstance(x, list) else 0)

Convertendo a coluna `release_date` para `datetimes`

In [1210]:
df['release_date'] = pd.to_datetime(df['release_date'], errors='coerce')

Criando a coluna `release_year`

In [1211]:
df['release_year'] = df['release_date'].dt.year

In [1212]:
df

Unnamed: 0,id,title,original_title,original_language,overview,budget,revenue,runtime,release_date,genres,popularity,vote_average,vote_count,num_genres,release_year
0,411405,Small Crimes,Small Crimes,en,"A disgraced former cop, fresh off a six-year p...",0.0,0.0,95.0,2017-04-28,"[Drama, Comedy, Thriller, Crime]",7.219022,5.8,55.0,4,2017.0
1,42492,Up the Sandbox,Up the Sandbox,en,"A young wife and mother, bored with day-to-day...",0.0,0.0,97.0,1972-12-21,"[Drama, Comedy]",0.138450,7.3,2.0,2,1972.0
2,12143,Bad Lieutenant,Bad Lieutenant,en,"While investigating a young nun's rape, a corr...",1000000.0,2019469.0,96.0,1992-09-16,"[Crime, Drama]",6.417037,6.9,162.0,2,1992.0
3,9976,Satan's Little Helper,Satan's Little Helper,en,A naïve young boy unknowingly becomes the pawn...,0.0,0.0,100.0,2004-01-01,"[Horror, Romance, Comedy]",2.233189,5.0,42.0,3,2004.0
4,46761,Sitcom,Sitcom,fr,The adventures of an upper-class suburban fami...,0.0,0.0,80.0,1998-05-27,"[Comedy, Drama, Thriller]",1.800582,6.4,27.0,3,1998.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,3093,Basic Instinct 2,Basic Instinct 2,en,Novelist Catherine Tramell is once again in tr...,70000000.0,38629478.0,114.0,2006-03-29,"[Crime, Mystery, Thriller]",15.321902,4.6,183.0,3,2006.0
4996,246127,Every Thing Will Be Fine,Every Thing Will Be Fine,en,"One day, driving aimlessly around the outskirt...",0.0,8034.0,118.0,2015-04-02,[Drama],5.723103,5.3,79.0,1,2015.0
4997,9803,Seven Dwarfs,7 Zwerge - Männer allein im Wald,de,The Seven Dwarves live deep within a female-fr...,0.0,0.0,95.0,2004-10-28,[Comedy],4.582736,5.2,70.0,1,2004.0
4998,336970,True Siblings,Syskonsalt,sv,"The siblings Linus, 19-years-old, who are taki...",0.0,0.0,58.0,2000-09-13,"[Drama, TV Movie]",2.364355,8.0,2.0,2,2000.0


Agora faremos one hot encoder na coluna `original language` para também inserirmos no treinamento do modelo de florestas aleatórias.

In [1213]:
df_encoder = pd.get_dummies(df, columns=['original_language'])

In [1214]:
df_encoder

Unnamed: 0,id,title,original_title,overview,budget,revenue,runtime,release_date,genres,popularity,...,original_language_te,original_language_th,original_language_tl,original_language_tr,original_language_uk,original_language_ur,original_language_uz,original_language_vi,original_language_xx,original_language_zh
0,411405,Small Crimes,Small Crimes,"A disgraced former cop, fresh off a six-year p...",0.0,0.0,95.0,2017-04-28,"[Drama, Comedy, Thriller, Crime]",7.219022,...,False,False,False,False,False,False,False,False,False,False
1,42492,Up the Sandbox,Up the Sandbox,"A young wife and mother, bored with day-to-day...",0.0,0.0,97.0,1972-12-21,"[Drama, Comedy]",0.138450,...,False,False,False,False,False,False,False,False,False,False
2,12143,Bad Lieutenant,Bad Lieutenant,"While investigating a young nun's rape, a corr...",1000000.0,2019469.0,96.0,1992-09-16,"[Crime, Drama]",6.417037,...,False,False,False,False,False,False,False,False,False,False
3,9976,Satan's Little Helper,Satan's Little Helper,A naïve young boy unknowingly becomes the pawn...,0.0,0.0,100.0,2004-01-01,"[Horror, Romance, Comedy]",2.233189,...,False,False,False,False,False,False,False,False,False,False
4,46761,Sitcom,Sitcom,The adventures of an upper-class suburban fami...,0.0,0.0,80.0,1998-05-27,"[Comedy, Drama, Thriller]",1.800582,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,3093,Basic Instinct 2,Basic Instinct 2,Novelist Catherine Tramell is once again in tr...,70000000.0,38629478.0,114.0,2006-03-29,"[Crime, Mystery, Thriller]",15.321902,...,False,False,False,False,False,False,False,False,False,False
4996,246127,Every Thing Will Be Fine,Every Thing Will Be Fine,"One day, driving aimlessly around the outskirt...",0.0,8034.0,118.0,2015-04-02,[Drama],5.723103,...,False,False,False,False,False,False,False,False,False,False
4997,9803,Seven Dwarfs,7 Zwerge - Männer allein im Wald,The Seven Dwarves live deep within a female-fr...,0.0,0.0,95.0,2004-10-28,[Comedy],4.582736,...,False,False,False,False,False,False,False,False,False,False
4998,336970,True Siblings,Syskonsalt,"The siblings Linus, 19-years-old, who are taki...",0.0,0.0,58.0,2000-09-13,"[Drama, TV Movie]",2.364355,...,False,False,False,False,False,False,False,False,False,False


# Inputando as colunas Budget e Revenue

In [1215]:
df_encoder.columns

Index(['id', 'title', 'original_title', 'overview', 'budget', 'revenue',
       'runtime', 'release_date', 'genres', 'popularity', 'vote_average',
       'vote_count', 'num_genres', 'release_year', 'original_language_ar',
       'original_language_ay', 'original_language_bg', 'original_language_bn',
       'original_language_ca', 'original_language_cn', 'original_language_cs',
       'original_language_da', 'original_language_de', 'original_language_el',
       'original_language_en', 'original_language_es', 'original_language_et',
       'original_language_fa', 'original_language_fi', 'original_language_fr',
       'original_language_he', 'original_language_hi', 'original_language_hr',
       'original_language_hu', 'original_language_id', 'original_language_is',
       'original_language_it', 'original_language_iu', 'original_language_ja',
       'original_language_ka', 'original_language_ko', 'original_language_lv',
       'original_language_mk', 'original_language_ml', 'original_la

Aqui temos as colunas de `features` que servirão para o treinamento e previsão do modelo.

In [1216]:
features = ['runtime', 'popularity', 'vote_average', 'vote_count', 'num_genres', 'release_year', 'original_language_ar',
       'original_language_ay', 'original_language_bg', 'original_language_bn',
       'original_language_ca', 'original_language_cn', 'original_language_cs',
       'original_language_da', 'original_language_de', 'original_language_el',
       'original_language_en', 'original_language_es', 'original_language_et',
       'original_language_fa', 'original_language_fi', 'original_language_fr',
       'original_language_he', 'original_language_hi', 'original_language_hr',
       'original_language_hu', 'original_language_id', 'original_language_is',
       'original_language_it', 'original_language_iu', 'original_language_ja',
       'original_language_ka', 'original_language_ko', 'original_language_lv',
       'original_language_mk', 'original_language_ml', 'original_language_mn',
       'original_language_mr', 'original_language_ms', 'original_language_nb',
       'original_language_nl', 'original_language_no', 'original_language_pa',
       'original_language_pl', 'original_language_ps', 'original_language_pt',
       'original_language_ro', 'original_language_ru', 'original_language_sk',
       'original_language_sl', 'original_language_sq', 'original_language_sr',
       'original_language_sv', 'original_language_ta', 'original_language_te',
       'original_language_th', 'original_language_tl', 'original_language_tr',
       'original_language_uk', 'original_language_ur', 'original_language_uz',
       'original_language_vi', 'original_language_xx', 'original_language_zh']

Antes de inputar as colunas vamos avaliar o desempenho do modelo de Florestas Aleatórias pelo Erro Absoluto Médio `(MAE)`. Essa métrica é uma das mais indicadas para inputar valores em colunas por regressão de Florestas Aleatórias, pois são robustos a outliers já que tratam todos os erros da mesma forma controlando o erro médio. 

In [1217]:
def avaliar_modelo(df, coluna_alvo):
    
    df_completo = df[df[coluna_alvo] != 0]
    
    df_train, df_test = train_test_split(df_completo, test_size=0.2, random_state=42)
    
    X_train = df_train.drop(columns=[coluna_alvo])
    Y_train = df_train[coluna_alvo]
    
    X_test = df_test.drop(columns=[coluna_alvo])
    Y_test = df_test[coluna_alvo]
    
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, Y_train)
    
    Y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(Y_pred, Y_test)
    
    return mae

Temos, portanto, que para a coluna `budget` o modelo erra cerca de `R$ 27414697.98` ao prever o valor na inputação. Para termos uma melhor noção se esse erro é bom ou ruim precisamos comparar com o valor típico (média) dos orçamentos.  

In [1218]:
mae_budget = avaliar_modelo(df_encoder[features + ['budget']], 'budget')
mae_budget

27414697.97788889

In [1219]:
budeget_test = df_encoder[df_encoder['budget'] != 0]

In [1220]:
budeget_test.mean(numeric_only=True)

id                      7.976318e+04
budget                  1.316451e+08
revenue                 4.835092e+07
runtime                 1.046956e+02
popularity              7.749254e+00
                            ...     
original_language_ur    0.000000e+00
original_language_uz    0.000000e+00
original_language_vi    1.111111e-03
original_language_xx    0.000000e+00
original_language_zh    3.333333e-03
Length: 67, dtype: float64

Obtemos a média de `budget` no valor de `R$ 131.645.100` que é compatível para orçamentos de filmes e fazendo a comparação com o MAE de budget temos um __erro__ __relativo__ de aproximadamente `21%`. Isso significa que aproximadamente o modelo erra `21%` do valor médio do orçamento e isso é aceitavel em dados financeiros ou orçamentos que possuem alta variabilidade. Logo, a previsão não é muito precisa mas o modelo está capturando padrões úteis nos dados. Podemos dizer que a previsão está boa.

In [1221]:
relative_mae = mae_budget / (budeget_test['budget'].mean())

print(f"Erro relativo médio: {relative_mae:.2%}")

Erro relativo médio: 20.82%


Já na coluna `revenue` o modelo erra aproximadamente `R$ 36341298.47` ao fazer uma previsão.`

In [1222]:
mae_revenue = avaliar_modelo(df_encoder[features + ['revenue']], 'revenue')
mae_revenue

36341298.4694375

Fazendo a comparação com a média dos valores em `revenue` obtemos um erro relativo de aproximadamente `60%` o que é um valor bastante alto mas isso condiz com a grande variabilidade dos dados nessa coluna pois temos um alto desvio padrão de __R$ 123 milhões__, aproximadamente. Isso significa que há uma grande dispersão dos dados em torno da média o que é comum em datasets de filmes com uns arrecadando poucos milhões e outros bilhões e isso dificulta previsões mais precisas de modelos com erros baixos. Como o MAE de `revenue` é aproximadamente `R$ 36341298.47` o modelo está errando menos do que a variabilidade natural dos dados, ou seja, está aprendendo algo útil e não apenas chutando valores. O MAE de `revenue` é grande mas razoável em relação a disperção dos dados.

In [1223]:
revenue_test = df_encoder[df_encoder['revenue'] != 0]

In [1224]:
revenue_test.describe()

Unnamed: 0,id,budget,revenue,runtime,release_date,popularity,vote_average,vote_count,num_genres,release_year
count,798.0,798.0,798.0,798.0,780,798.0,798.0,798.0,798.0,780.0
mean,66150.689223,19685000.0,60711000.0,107.892231,1999-06-14 01:50:46.153846144,8.655179,6.198496,517.817043,2.464912,1998.910256
min,13.0,0.0,-50000.0,0.0,1918-08-01 00:00:00,0.001223,0.0,0.0,0.0,1918.0
25%,9570.75,0.0,1917916.0,94.0,1992-11-22 00:00:00,3.851706,5.625,36.0,2.0,1992.0
50%,19121.0,5000000.0,15799410.0,102.5,2004-01-17 12:00:00,7.419201,6.3,152.5,2.0,2003.5
75%,70766.25,25000000.0,63722000.0,119.0,2011-02-16 00:00:00,10.743956,6.9,522.5,3.0,2011.0
max,428449.0,260000000.0,1156731000.0,216.0,2017-08-04 00:00:00,547.488298,8.5,8670.0,6.0,2017.0
std,97616.195715,33370150.0,123312200.0,22.236178,,20.578149,1.078185,995.938961,1.090922,16.789328


In [1225]:
relative_mae = mae_revenue/revenue_test['revenue'].mean()

print(f"Erro relativo médio: {relative_mae:.2%}")

Erro relativo médio: 59.86%


Agora vamos inputar as colunas `budget` e `revenue` com o modelo. 

In [1226]:
def inputar_coluna(df, coluna_alvo, colunas):
    
    valido = ~df[colunas].isna().any(axis=1)
    
    X_train = df.loc[valido & (df[coluna_alvo] != 0), colunas]
    Y_train = df.loc[valido & (df[coluna_alvo] != 0), coluna_alvo]
    X_pred = df.loc[valido & (df[coluna_alvo] == 0), colunas]
    
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, Y_train)
    
    Y_pred = model.predict(X_pred)
    
    df.loc[X_pred.index, coluna_alvo] = Y_pred
    
    return df

In [1227]:
df_budget = inputar_coluna(df_encoder, 'budget', features)
df_budget

Unnamed: 0,id,title,original_title,overview,budget,revenue,runtime,release_date,genres,popularity,...,original_language_te,original_language_th,original_language_tl,original_language_tr,original_language_uk,original_language_ur,original_language_uz,original_language_vi,original_language_xx,original_language_zh
0,411405,Small Crimes,Small Crimes,"A disgraced former cop, fresh off a six-year p...",12365792.57,0.0,95.0,2017-04-28,"[Drama, Comedy, Thriller, Crime]",7.219022,...,False,False,False,False,False,False,False,False,False,False
1,42492,Up the Sandbox,Up the Sandbox,"A young wife and mother, bored with day-to-day...",629789.52,0.0,97.0,1972-12-21,"[Drama, Comedy]",0.138450,...,False,False,False,False,False,False,False,False,False,False
2,12143,Bad Lieutenant,Bad Lieutenant,"While investigating a young nun's rape, a corr...",1000000.00,2019469.0,96.0,1992-09-16,"[Crime, Drama]",6.417037,...,False,False,False,False,False,False,False,False,False,False
3,9976,Satan's Little Helper,Satan's Little Helper,A naïve young boy unknowingly becomes the pawn...,5580448.55,0.0,100.0,2004-01-01,"[Horror, Romance, Comedy]",2.233189,...,False,False,False,False,False,False,False,False,False,False
4,46761,Sitcom,Sitcom,The adventures of an upper-class suburban fami...,2930791.96,0.0,80.0,1998-05-27,"[Comedy, Drama, Thriller]",1.800582,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,3093,Basic Instinct 2,Basic Instinct 2,Novelist Catherine Tramell is once again in tr...,70000000.00,38629478.0,114.0,2006-03-29,"[Crime, Mystery, Thriller]",15.321902,...,False,False,False,False,False,False,False,False,False,False
4996,246127,Every Thing Will Be Fine,Every Thing Will Be Fine,"One day, driving aimlessly around the outskirt...",15751313.18,8034.0,118.0,2015-04-02,[Drama],5.723103,...,False,False,False,False,False,False,False,False,False,False
4997,9803,Seven Dwarfs,7 Zwerge - Männer allein im Wald,The Seven Dwarves live deep within a female-fr...,12795408.80,0.0,95.0,2004-10-28,[Comedy],4.582736,...,False,False,False,False,False,False,False,False,False,False
4998,336970,True Siblings,Syskonsalt,"The siblings Linus, 19-years-old, who are taki...",3307811.75,0.0,58.0,2000-09-13,"[Drama, TV Movie]",2.364355,...,False,False,False,False,False,False,False,False,False,False


In [1228]:
(df_budget['budget'] == 0).sum()

87

In [1229]:
df_revenue = inputar_coluna(df_encoder, 'revenue', features)
df_revenue

Unnamed: 0,id,title,original_title,overview,budget,revenue,runtime,release_date,genres,popularity,...,original_language_te,original_language_th,original_language_tl,original_language_tr,original_language_uk,original_language_ur,original_language_uz,original_language_vi,original_language_xx,original_language_zh
0,411405,Small Crimes,Small Crimes,"A disgraced former cop, fresh off a six-year p...",12365792.57,3111954.14,95.0,2017-04-28,"[Drama, Comedy, Thriller, Crime]",7.219022,...,False,False,False,False,False,False,False,False,False,False
1,42492,Up the Sandbox,Up the Sandbox,"A young wife and mother, bored with day-to-day...",629789.52,1354322.31,97.0,1972-12-21,"[Drama, Comedy]",0.138450,...,False,False,False,False,False,False,False,False,False,False
2,12143,Bad Lieutenant,Bad Lieutenant,"While investigating a young nun's rape, a corr...",1000000.00,2019469.00,96.0,1992-09-16,"[Crime, Drama]",6.417037,...,False,False,False,False,False,False,False,False,False,False
3,9976,Satan's Little Helper,Satan's Little Helper,A naïve young boy unknowingly becomes the pawn...,5580448.55,11719114.63,100.0,2004-01-01,"[Horror, Romance, Comedy]",2.233189,...,False,False,False,False,False,False,False,False,False,False
4,46761,Sitcom,Sitcom,The adventures of an upper-class suburban fami...,2930791.96,6196771.07,80.0,1998-05-27,"[Comedy, Drama, Thriller]",1.800582,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,3093,Basic Instinct 2,Basic Instinct 2,Novelist Catherine Tramell is once again in tr...,70000000.00,38629478.00,114.0,2006-03-29,"[Crime, Mystery, Thriller]",15.321902,...,False,False,False,False,False,False,False,False,False,False
4996,246127,Every Thing Will Be Fine,Every Thing Will Be Fine,"One day, driving aimlessly around the outskirt...",15751313.18,8034.00,118.0,2015-04-02,[Drama],5.723103,...,False,False,False,False,False,False,False,False,False,False
4997,9803,Seven Dwarfs,7 Zwerge - Männer allein im Wald,The Seven Dwarves live deep within a female-fr...,12795408.80,16461042.70,95.0,2004-10-28,[Comedy],4.582736,...,False,False,False,False,False,False,False,False,False,False
4998,336970,True Siblings,Syskonsalt,"The siblings Linus, 19-years-old, who are taki...",3307811.75,25709208.27,58.0,2000-09-13,"[Drama, TV Movie]",2.364355,...,False,False,False,False,False,False,False,False,False,False


In [1230]:
(df_revenue['revenue'] == 0).sum()

96

# Salvando o novo dataset no banco de dados

Primeiro vamos desfazer o one hot encoder para a coluna de `original_language` de antes.

In [1231]:
df[['budget', 'revenue']] = df_revenue[['budget', 'revenue']]

In [1232]:
df

Unnamed: 0,id,title,original_title,original_language,overview,budget,revenue,runtime,release_date,genres,popularity,vote_average,vote_count,num_genres,release_year
0,411405,Small Crimes,Small Crimes,en,"A disgraced former cop, fresh off a six-year p...",12365792.57,3111954.14,95.0,2017-04-28,"[Drama, Comedy, Thriller, Crime]",7.219022,5.8,55.0,4,2017.0
1,42492,Up the Sandbox,Up the Sandbox,en,"A young wife and mother, bored with day-to-day...",629789.52,1354322.31,97.0,1972-12-21,"[Drama, Comedy]",0.138450,7.3,2.0,2,1972.0
2,12143,Bad Lieutenant,Bad Lieutenant,en,"While investigating a young nun's rape, a corr...",1000000.00,2019469.00,96.0,1992-09-16,"[Crime, Drama]",6.417037,6.9,162.0,2,1992.0
3,9976,Satan's Little Helper,Satan's Little Helper,en,A naïve young boy unknowingly becomes the pawn...,5580448.55,11719114.63,100.0,2004-01-01,"[Horror, Romance, Comedy]",2.233189,5.0,42.0,3,2004.0
4,46761,Sitcom,Sitcom,fr,The adventures of an upper-class suburban fami...,2930791.96,6196771.07,80.0,1998-05-27,"[Comedy, Drama, Thriller]",1.800582,6.4,27.0,3,1998.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,3093,Basic Instinct 2,Basic Instinct 2,en,Novelist Catherine Tramell is once again in tr...,70000000.00,38629478.00,114.0,2006-03-29,"[Crime, Mystery, Thriller]",15.321902,4.6,183.0,3,2006.0
4996,246127,Every Thing Will Be Fine,Every Thing Will Be Fine,en,"One day, driving aimlessly around the outskirt...",15751313.18,8034.00,118.0,2015-04-02,[Drama],5.723103,5.3,79.0,1,2015.0
4997,9803,Seven Dwarfs,7 Zwerge - Männer allein im Wald,de,The Seven Dwarves live deep within a female-fr...,12795408.80,16461042.70,95.0,2004-10-28,[Comedy],4.582736,5.2,70.0,1,2004.0
4998,336970,True Siblings,Syskonsalt,sv,"The siblings Linus, 19-years-old, who are taki...",3307811.75,25709208.27,58.0,2000-09-13,"[Drama, TV Movie]",2.364355,8.0,2.0,2,2000.0


Vamos mudar os registros em `genres` para `strings` para salvarmos no banco de dados já que não é permitido salvar como listas. 

In [1233]:
df['genres'] = df['genres'].apply(lambda x: str(x))

In [1234]:
df

Unnamed: 0,id,title,original_title,original_language,overview,budget,revenue,runtime,release_date,genres,popularity,vote_average,vote_count,num_genres,release_year
0,411405,Small Crimes,Small Crimes,en,"A disgraced former cop, fresh off a six-year p...",12365792.57,3111954.14,95.0,2017-04-28,"['Drama', 'Comedy', 'Thriller', 'Crime']",7.219022,5.8,55.0,4,2017.0
1,42492,Up the Sandbox,Up the Sandbox,en,"A young wife and mother, bored with day-to-day...",629789.52,1354322.31,97.0,1972-12-21,"['Drama', 'Comedy']",0.138450,7.3,2.0,2,1972.0
2,12143,Bad Lieutenant,Bad Lieutenant,en,"While investigating a young nun's rape, a corr...",1000000.00,2019469.00,96.0,1992-09-16,"['Crime', 'Drama']",6.417037,6.9,162.0,2,1992.0
3,9976,Satan's Little Helper,Satan's Little Helper,en,A naïve young boy unknowingly becomes the pawn...,5580448.55,11719114.63,100.0,2004-01-01,"['Horror', 'Romance', 'Comedy']",2.233189,5.0,42.0,3,2004.0
4,46761,Sitcom,Sitcom,fr,The adventures of an upper-class suburban fami...,2930791.96,6196771.07,80.0,1998-05-27,"['Comedy', 'Drama', 'Thriller']",1.800582,6.4,27.0,3,1998.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,3093,Basic Instinct 2,Basic Instinct 2,en,Novelist Catherine Tramell is once again in tr...,70000000.00,38629478.00,114.0,2006-03-29,"['Crime', 'Mystery', 'Thriller']",15.321902,4.6,183.0,3,2006.0
4996,246127,Every Thing Will Be Fine,Every Thing Will Be Fine,en,"One day, driving aimlessly around the outskirt...",15751313.18,8034.00,118.0,2015-04-02,['Drama'],5.723103,5.3,79.0,1,2015.0
4997,9803,Seven Dwarfs,7 Zwerge - Männer allein im Wald,de,The Seven Dwarves live deep within a female-fr...,12795408.80,16461042.70,95.0,2004-10-28,['Comedy'],4.582736,5.2,70.0,1,2004.0
4998,336970,True Siblings,Syskonsalt,sv,"The siblings Linus, 19-years-old, who are taki...",3307811.75,25709208.27,58.0,2000-09-13,"['Drama', 'TV Movie']",2.364355,8.0,2.0,2,2000.0


Agora vamos salvar o novo dataframe no banco de dados.

In [1235]:
df.to_sql('filmes', con=engine, if_exists='replace', index=False)

5000