## Here we are going to merge the different data:
#### Prerequisites:
- The data has to have been cleaned and preprocessed. By reducing to the same number of rows
- The preprocessed data is to be found in each respective folder under clean folder


In [13]:
import pandas as pd
from src.utils import fetch_data, preprocess_data, generate_dataframes

In [14]:
# Define the column names and values 
value_columns = {'Cities': 'Views', 'Content type': 'Views', 'Device type': 'Views', 'Geography': 'Views', 'New and returning viewers': 'Views', 'Operating system': 'Views', 'Sharing service': 'Shares'}


In [15]:
# Preprocess the data
cities_clean_chart_df = fetch_data('Cities', 'Chart data', True)

In [16]:
reshaped_dataframes = generate_dataframes(value_columns, clean_data=True)

### Merge the data on date

In [17]:
def merge_dataframes(reshaped_dataframes, value_columns):
    # Initialize merged_df with the first dataframe
    merged_df = reshaped_dataframes[list(value_columns.keys())[0]]

    # Merge all other dataframes
    for name in list(value_columns.keys())[1:]:
        merged_df = pd.merge(merged_df, reshaped_dataframes[name], on='Date', how='outer')

    return merged_df

merged_df = merge_dataframes(reshaped_dataframes, value_columns)

In [18]:
merged_df.head()

Unnamed: 0,Date,0x164b85cef5ab402d:0x8467b6b037a24d49,0x168e8fde9837cabf:0x191f55de7e67db40,0x3397ba0942ef7375:0x4a9a32d9fe083d40,0x3568eb6de823cd35:0x35d8cb74247108a7,0x487a4d4c5226f5db:0xd9be143804fe6baa,Other_x,Videos,Computer,Mobile phone,...,iOS,Copy to Clipboard,Facebook,Gmail,LinkedIn,Other_y,Share to WhatsApp Business,Text Message,Twitter,WhatsApp
0,2020-06-28,0,0,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-06-29,0,0,0,0,0,0,72,49,23,...,0,1,0,0,0,0,0,0,0,0
2,2020-06-30,0,0,0,0,0,0,76,62,14,...,0,0,0,0,0,0,0,0,0,0
3,2020-07-01,0,0,0,0,0,0,70,46,24,...,0,0,0,0,0,0,0,0,0,0
4,2020-07-02,0,0,0,0,0,0,57,29,28,...,0,0,0,0,0,0,0,0,0,0


In [19]:
def rename_columns(dataframes, value_columns):
    renamed_dataframes = {}
    for name, value_column in value_columns.items():
        # Copy the dataframe to avoid modifying the original one
        df = dataframes[name].copy()

        # Rename the columns
        df.columns = [f"{name}_{col}" if col != 'Date' else col for col in df.columns]

        renamed_dataframes[name] = df

    return renamed_dataframes

# Rename the columns of the dataframes
renamed_dataframes = rename_columns(reshaped_dataframes, value_columns)

# Now you can merge the renamed dataframes
merged_df = merge_dataframes(renamed_dataframes, value_columns)

In [20]:
merged_df.head()

Unnamed: 0,Date,Cities_0x164b85cef5ab402d:0x8467b6b037a24d49,Cities_0x168e8fde9837cabf:0x191f55de7e67db40,Cities_0x3397ba0942ef7375:0x4a9a32d9fe083d40,Cities_0x3568eb6de823cd35:0x35d8cb74247108a7,Cities_0x487a4d4c5226f5db:0xd9be143804fe6baa,Content type_Other,Content type_Videos,Device type_Computer,Device type_Mobile phone,...,Operating system_iOS,Sharing service_Copy to Clipboard,Sharing service_Facebook,Sharing service_Gmail,Sharing service_LinkedIn,Sharing service_Other,Sharing service_Share to WhatsApp Business,Sharing service_Text Message,Sharing service_Twitter,Sharing service_WhatsApp
0,2020-06-28,0,0,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-06-29,0,0,0,0,0,0,72,49,23,...,0,1,0,0,0,0,0,0,0,0
2,2020-06-30,0,0,0,0,0,0,76,62,14,...,0,0,0,0,0,0,0,0,0,0
3,2020-07-01,0,0,0,0,0,0,70,46,24,...,0,0,0,0,0,0,0,0,0,0
4,2020-07-02,0,0,0,0,0,0,57,29,28,...,0,0,0,0,0,0,0,0,0,0


In [21]:
merged_df.tail()

Unnamed: 0,Date,Cities_0x164b85cef5ab402d:0x8467b6b037a24d49,Cities_0x168e8fde9837cabf:0x191f55de7e67db40,Cities_0x3397ba0942ef7375:0x4a9a32d9fe083d40,Cities_0x3568eb6de823cd35:0x35d8cb74247108a7,Cities_0x487a4d4c5226f5db:0xd9be143804fe6baa,Content type_Other,Content type_Videos,Device type_Computer,Device type_Mobile phone,...,Operating system_iOS,Sharing service_Copy to Clipboard,Sharing service_Facebook,Sharing service_Gmail,Sharing service_LinkedIn,Sharing service_Other,Sharing service_Share to WhatsApp Business,Sharing service_Text Message,Sharing service_Twitter,Sharing service_WhatsApp
1274,2023-12-24,0,0,0,0,0,0,36,17,19,...,11,0,0,0,0,0,0,0,0,0
1275,2023-12-25,0,0,0,0,0,0,33,17,16,...,1,0,0,0,0,0,0,0,0,0
1276,2023-12-26,15,0,0,0,0,0,57,33,21,...,2,0,0,0,0,0,0,0,0,0
1277,2023-12-27,0,0,0,0,0,0,8,4,4,...,0,0,0,0,0,0,0,0,0,0
1278,2023-12-28,0,0,0,0,0,0,25,24,1,...,0,0,0,0,0,0,0,0,0,0


In [ ]:
from sqlalchemy import create_engine, inspect

# Create the engine (replace 'postgresql://user:password@localhost/dbname' with your actual database connection string)
engine = create_engine('postgresql://localhost:postgres@localhost:15432/dbname')

# Write the DataFrame to a table in the SQL database
merged_df.to_sql('table_name', engine, if_exists='replace')

# Use Inspector to get the CREATE TABLE statement
inspector = inspect(engine)
print(inspector.get_create_table('table_name'))