In [131]:
from sqlalchemy import create_engine
import os
import sys
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get credentials
DBNAME = os.getenv('DBNAME')
DBUSER = os.getenv('DBUSER')
DBPASSWORD = os.getenv('DBPASSWORD')
DBHOST = os.getenv('DBHOST')
DBPORT = os.getenv('DBPORT')

# Create the database URI and engine
DATABASE_URI = f'postgresql+psycopg2://{DBUSER}:{DBPASSWORD}@{DBHOST}:{DBPORT}/{DBNAME}'
engine = create_engine(DATABASE_URI)

# Test the connection
try:
    with engine.connect() as connection:
        print("Connection to the database was successful!")
except Exception as e:
    print(f"Connection failed: {e}")

Connection to the database was successful!


In [143]:
import pandas as pd

# Define a function to load data from a specific table
def load_table(table_name):
    query = f"SELECT * FROM {table_name};"
    return pd.read_sql(query, engine)

# Load data from `xdr_data` table

df_xdr = load_table('xdr_data')

# Display the first few rows 

print(df_xdr.head())


      Bearer Id            Start  Start ms              End  End ms  \
0  1.311448e+19   4/4/2019 12:01     770.0  4/25/2019 14:35   662.0   
1  1.311448e+19   4/9/2019 13:04     235.0   4/25/2019 8:15   606.0   
2  1.311448e+19   4/9/2019 17:42       1.0  4/25/2019 11:58   652.0   
3  1.311448e+19   4/10/2019 0:31     486.0   4/25/2019 7:36   171.0   
4  1.311448e+19  4/12/2019 20:10     565.0  4/25/2019 10:40   954.0   

   Dur. (ms)          IMSI  MSISDN/Number          IMEI  \
0  1823652.0  2.082014e+14   3.366496e+10  3.552121e+13   
1  1365104.0  2.082019e+14   3.368185e+10  3.579401e+13   
2  1361762.0  2.082003e+14   3.376063e+10  3.528151e+13   
3  1321509.0  2.082014e+14   3.375034e+10  3.535661e+13   
4  1089009.0  2.082014e+14   3.369980e+10  3.540701e+13   

      Last Location Name  ...  Youtube DL (Bytes)  Youtube UL (Bytes)  \
0  9.16456699548519E+015  ...          15854611.0           2501332.0   
1                L77566A  ...          20247395.0          19111729.0   

In [149]:
# Count of each handset and get the top 10
top_handsets = df_xdr['Handset Type'].value_counts().head(10)
print("\nTop 10 Handsets:")
print(top_handsets)



Top 10 Handsets:
Handset Type
Huawei B528S-23A                59256
Apple iPhone 6S (A1688)         28257
Apple iPhone 6 (A1586)          27069
undefined                       26961
Apple iPhone 7 (A1778)          18978
Apple iPhone Se (A1723)         15561
Apple iPhone 8 (A1905)          14979
Apple iPhone Xr (A2105)         13704
Samsung Galaxy S8 (Sm-G950F)    13560
Apple iPhone X (A1901)          11439
Name: count, dtype: int64


In [150]:
# Count of each manufacturer and get the top 3
top_manufacturers = df_xdr['Handset Manufacturer'].value_counts().head(3)
print("\nTop 3 Manufacturers:")
print(top_manufacturers)



Top 3 Manufacturers:
Handset Manufacturer
Apple      178695
Samsung    122517
Huawei     103269
Name: count, dtype: int64
