In [None]:
from dotenv import load_dotenv
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
#to move from notebooks/example_notebook.ipynb
import sys
import os
sys.path.append(os.path.abspath('../scripts'))

In [None]:
from load_data import load_data_from_postgres, load_data_using_sqlalchemy
from sql_queries import execute_telecom_queries


In [None]:
# Load environment variables from .env file
load_dotenv()

# Fetch database connection parameters from environment variables
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")


In [None]:
# Define the SQL query
query = "SELECT * FROM xdr_data;"

# Load data from PostgreSQL using SQLAlchemy
df = load_data_using_sqlalchemy(query)

# Display the first few rows of the dataframe
if df is not None:
    print("Successfully loaded the data")
else:
    print("Failed to load data.")

In [None]:
print("using the prefered method(SQLAlchemy)")
df

In [None]:
# Count missing values per column
missing_values = df.isnull().sum()

# Percentage of missing values per column
missing_percentage = (df.isnull().sum() / len(df)) * 100
print(missing_percentage)

In [None]:
#handling the missing value

import user_overview_EDA as uoe
uoe.handle_missing_values(df, unique_identifiers=['Bearer Id', 'IMSI', 'MSISDN/Number', 'IMEI'])



In [None]:
#function to treat outliers
uoe.treat_outliers(df)

In [None]:
#Aggregate Engagement Metrics Per Customer
import User_Experiance as uex
data = uoe.treat_outliers(df)
uex.aggregate_per_customer(data)

In [None]:
#function to compute top bottom and frequent tcp rtt and throughput
import User_Experiance as uex
# Define columns of interest
columns_of_interest = {
    'TCP DL Retrans. Vol (Bytes)': 'TCP_DL',
    'TCP UL Retrans. Vol (Bytes)': 'TCP_UL',
    'Avg RTT DL (ms)': 'RTT_DL',
    'Avg RTT UL (ms)': 'RTT_UL',
    'Avg Bearer TP DL (kbps)': 'Throughput_DL',
    'Avg Bearer TP UL (kbps)': 'Throughput_UL'
}
uex.compute_and_display_stats(df, columns_of_interest, top_n=10)

In [None]:
# Import the necessary functions from the module
from User_Experiance import distribution_per_handset
#Distribution of Average Throughput per Handset Type
# Downlink throughput
throughput_dl = distribution_per_handset(
    df, 
    'Throughput_DL',  
    'Handset Type',   
    'Top 10 Handsets by Average Downlink Throughput',  
    'Average Downlink Throughput (kbps)',  
    top_n=15  
)

In [None]:
# Uplink throughput
throughput_ul = distribution_per_handset(
    df, 
    'Throughput_UL',  
    'Handset Type',   
    'Top 15 Handsets by Average Uplink Throughput',  
    'Average Uplink Throughput (kbps)',  
    top_n=15  
)

In [None]:
# Distribution of Average TCP Retransmission per Handset Type
# Downlink TCP retransmission
tcp_dl_retrans = distribution_per_handset(
    df, 
    'TCP_DL',  
    'Handset Type',  
    'Top 15 Handsets by Average Downlink TCP Retransmission',  
    'Average Downlink TCP Retransmission (Bytes)', 
    top_n=15  
)

In [None]:
# Uplink TCP retransmission
tcp_ul_retrans = distribution_per_handset(
    df, 
    'TCP_UL',  
    'Handset Type',  
    'Top 15 Handsets by Average Uplink TCP Retransmission',  
    'Average Uplink TCP Retransmission (Bytes)',  
    top_n=15  
)