Process Customer LSTM Files and Save to SQLite

In [1]:
import pandas as pd
import glob
import sqlite3
import numpy as np

# Function to read and process LSTM Customer CSV files with cluster information
def process_lstm_files(file_pattern):
    csv_files = glob.glob(file_pattern)
    all_data = []

    for file in csv_files:
        df = pd.read_csv(file)
        cluster_name = '1' if 'Cluster1' in file else '2' if 'Cluster2' in file else '3' if 'Cluster3' in file else 'Unknown'
        df['Cluster'] = cluster_name
        df.loc[df['Revenue'] == df['Predict'], 'Revenue'] = np.nan  # Remove Revenue if it equals Predict
        all_data.append(df)

    combined_df = pd.concat(all_data, ignore_index=True)
    total_predict_per_cluster = combined_df.groupby('Cluster')['Predict'].sum()
    for cluster in ['1', '2', '3']:
        print(f"Total Predict Value for Cluster {cluster}: {total_predict_per_cluster.get(cluster, 'No Data')}")
    
    return combined_df

# Process LSTM files and save to SQLite
combined_lstm_df = process_lstm_files('data/LSTM/New/Option4/*.csv')

Total Predict Value for Cluster 1: 1146484.342
Total Predict Value for Cluster 2: 61263295.7
Total Predict Value for Cluster 3: 2830418.276


Save to SQLite

In [2]:
# Function to save DataFrame to SQLite database
def save_to_sqlite(df, db_name, table_name):
    with sqlite3.connect(db_name) as conn:
        conn.execute(f"DROP TABLE IF EXISTS {table_name}")
        df.to_sql(table_name, conn, if_exists='append', index=False)
        df_preview = pd.read_sql_query(f"SELECT * FROM {table_name} LIMIT 5", conn)
        print(f"\nPreview of table '{table_name}':\n", df_preview)

# Save LSTM data to SQLite
save_to_sqlite(combined_lstm_df, 'predictions_LSTM.db', 'predictions_LSTM')



Preview of table 'predictions_LSTM':
    Unnamed: 0        Date     Revenue Train  Test Predict Cluster
0           0  2021-12-27   6550.0405  None  None    None       1
1           1  2022-01-03  21021.4300  None  None    None       1
2           2  2022-01-10  26732.9860  None  None    None       1
3           3  2022-01-17  29902.7830  None  None    None       1
4           4  2022-01-24  23850.3100  None  None    None       1


Process Customer Clusters

In [3]:
# Function to process customer cluster assignments
def process_customer_clusters(csv_file):
    df = pd.read_csv(csv_file, header=0, names=['ID', 'cluster'])
    df['cluster'] = df['cluster'].replace({0: 1, 1: 2, 2: 3})
    cluster_counts = df['cluster'].value_counts()
    print(f"Cluster 1 count: {cluster_counts.get(1, 0)}")
    print(f"Cluster 2 count: {cluster_counts.get(2, 0)}")
    print(f"Cluster 3 count: {cluster_counts.get(3, 0)}")
    return df

# Process customer cluster assignments
customer_clusters_df = process_customer_clusters('data/clusterAssignments.csv')


Cluster 1 count: 489
Cluster 2 count: 10672
Cluster 3 count: 33


Save Customer Clusters to SQLite

In [4]:
# Save customer cluster data to SQLite
save_to_sqlite(customer_clusters_df, 'predictions_LSTM.db', 'customer_cluster')



Preview of table 'customer_cluster':
          ID  cluster
0  10001245        2
1  10001255        2
2  10003830        2
3  10003951        2
4  10004155        2
