# Importing Deploy

In [18]:
# Upgrade the scikit-learn library to the latest version
!pip install --upgrade scikit-learn



In [19]:
# Import necessary libraries: joblib, pandas, RobustScaler, and PCA
from joblib import load
import pandas as pd
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA

In [20]:
# Download files from Google Drive using file IDs.
!gdown 1QgnJLMk3bYuzMRx8QN8KWp4eaFyCtX-Z
!gdown 1DBsgpSCfNvYo0NH2RVoO7XbCcrB8YZZ7

Downloading...
From: https://drive.google.com/uc?id=1QgnJLMk3bYuzMRx8QN8KWp4eaFyCtX-Z
To: /content/df_final.csv
100% 5.63M/5.63M [00:00<00:00, 175MB/s]
Downloading...
From: https://drive.google.com/uc?id=1DBsgpSCfNvYo0NH2RVoO7XbCcrB8YZZ7
To: /content/RandomForestModel.joblib
100% 18.1M/18.1M [00:00<00:00, 180MB/s]


In [21]:
# Load a saved Random Forest model from a file
rf_loaded = load('/content/RandomForestModel.joblib')

# Read a DataFrame from a CSV file
df_final = pd.read_csv('/content/df_final.csv')

# Making the prediction

In [22]:
# Remove the 'Unnamed: 0' column from the DataFrame df_final
df_final.drop('Unnamed: 0', axis=1, inplace=True)

In [23]:
# Create a feature matrix X by dropping the 'id' and 'churn' columns from df_final
X = df_final.drop(columns=['id', 'churn'])

In [24]:
# Make predictions on the data using the loaded Random Forest model (rf_loaded)
previsionRF = rf_loaded.predict_proba(X)

In [25]:
# Calculate the probability predictions by extracting the second element (index 1) from each prediction in previsionRF
prob_predict = [prediction[1] for prediction in previsionRF]

# Table merge

In [26]:
# Create a DataFrame 'df_prob' from the 'prob_predict' list
df_prob = pd.DataFrame(prob_predict)

# Create a DataFrame 'x' from 'X'
x = pd.DataFrame(X)

# Create a DataFrame 'id_col' from the 'id' column of 'df_final'
id_col = pd.DataFrame(df_final['id'])

# Concatenate 'id_col' and 'x' along columns to create 'df_final_s'
df_final_s = pd.concat([id_col, x], axis=1)

# Retrieve the column names of 'df_final_s'
df_final_s.columns

Index(['id', 'cons_12m', 'cons_gas_12m', 'cons_last_month',
       'forecast_base_bill_ele', 'forecast_cons_12m', 'forecast_cons_year',
       'forecast_discount_energy', 'forecast_meter_rent_12m',
       'forecast_price_energy_p1', 'forecast_price_energy_p2',
       'forecast_price_pow_p1', 'has_gas', 'imp_cons', 'margin_gross_pow_ele',
       'margin_net_pow_ele', 'nb_prod_act', 'net_margin', 'num_years_antig',
       'pow_max', 'channel_sales_epumfxlbckeskwekxbiuasklxalciiuu',
       'channel_sales_ewpakwlliwisiwduibdlfmalxowmwpci',
       'channel_sales_fixdbufsefwooaasfcxdxadsiekoceaa',
       'channel_sales_foosdfpfkusacimwkcsosbicdxkicaua',
       'channel_sales_lmkebamcaaclubfxadlmueccxoimlema',
       'channel_sales_no_fill',
       'channel_sales_sddiedcslfslkckwlfkdpoeeailfpeds',
       'channel_sales_usilxuppasemubllopkaafesmlibmsdf',
       'origin_up_ewxeelcelemmiwuafmddpobolfuxioce',
       'origin_up_kamkkxfxxuwbdslkwifmmcsiusiuosws',
       'origin_up_ldkssxwpmemidmece

In [None]:
# Joining the classification column with the main DataFrame
df_final_s['Churn Probability'] = df_prob

# Creating the final DataFrame containing only the 'id' column and the churn probability classification
df_final = df_final_s[['id', 'net_margin', 'price_p1_var', 'price_p1_fix', 'num_years_antig', 'cons_12m', 'consumption', 'Churn Probability']]

# Exporting

In [None]:
# Create a Pandas Excel writer object
datatoexcel = pd.ExcelWriter('Predict_Churn.xlsx')

# Write the 'df_final' DataFrame to the Excel file
df_final.to_excel(datatoexcel)

# Save the Excel file
datatoexcel.save()

# Download the Excel file using Google Colab
from google.colab import files
files.download('Predict_Churn.xlsx')

  datatoexcel.save()


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>