In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
print("Shape:", df.shape)

In [None]:
print("Columns:", df.columns.tolist())

In [None]:
print("Missingvalues", df.isnull().sum)

In [None]:
print("Missing values (top 10):\n", df.isnull().sum().sort_values(ascending=False).head(10))

In [None]:
Targets=['DL_bitrate','UL_bitrate','PINGAVG']
print("Summery Statistics for Throughput:\n")
print(df[Targets].describe(percentiles=[0.01,0.25,0.5,0.75,0.99]))

In [None]:
print("\n% of Zero Values:")
print((df[Targets] == 0).sum() / len(df) * 100)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18,5))
for i, col in enumerate(Targets):
    sns.histplot(df[col].dropna(), bins=50, kde=True, ax=axes[i])
    axes[i].set_title(f"Distribution of {col}")
plt.show()

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18,5))
for i, col in enumerate(Targets):
    sns.boxplot(x=df[col], ax=axes[i])
    axes[i].set_title(f"Outliers in {col}")
plt.show()

In [None]:
num_cols = df.select_dtypes(include=['float64','int64']).columns
corr_matrix = df[num_cols].corr()

print("Correlation with Targets")
print(corr_matrix[Targets].sort_values(by="DL_bitrate", ascending=False).head(10))

In [None]:
num_cols = df.select_dtypes(include=['float64','int64']).columns
corr = df[num_cols].corr()

plt.figure(figsize=(12,8))
sns.heatmap(corr[Targets].sort_values(by='DL_bitrate', ascending=False),
            annot=True, cmap="coolwarm", cbar=True)
plt.title("Correlation of Features with Targets (Throughput & Latency)")
plt.show()

In [None]:
features_to_check = ['Level','SNR','CQI','Qual','Speed','BANDWIDTH']

for col in features_to_check:
    if col in df.columns:
        for target in Targets:
            plt.figure(figsize=(6,4))
            sns.scatterplot(data=df, x=col, y=target, alpha=0.3)
            plt.title(f"{target} vs {col}")
            plt.show()

In [None]:
if 'ElapsedTime' in df.columns:
    plt.figure(figsize=(12,6))
    sns.lineplot(data=df, x='ElapsedTime', y='DL_bitrate', label='Downlink')
    sns.lineplot(data=df, x='ElapsedTime', y='UL_bitrate', label='Uplink')
    sns.lineplot(data=df, x='ElapsedTime', y='PINGAVG', label='Latency')
    plt.title("Throughput & Latency Over Elapsed Time")
    plt.legend()
    plt.show()


In [None]:
if 'Longitude' in df.columns and 'Latitude' in df.columns:
    plt.figure(figsize=(10,7))
    sns.scatterplot(data=df, x='Longitude', y='Latitude',
                    hue='DL_bitrate', palette='viridis', alpha=0.5)
    plt.title("Geospatial Distribution of Downlink Throughput")
    plt.show()
    plt.figure(figsize=(10,7))
    sns.scatterplot(data=df, x='Longitude', y='Latitude',
                    hue='PINGAVG', palette='coolwarm', alpha=0.5)
    plt.title("Geospatial Distribution of Latency")
    plt.show()

In [None]:
if 'Operatorname' in df.columns:
    plt.figure(figsize=(10,6))
    sns.boxplot(data=df, x='Operatorname', y='DL_bitrate')
    plt.title("Downlink Throughput by Operator")
    plt.show()

    plt.figure(figsize=(10,6))
    sns.boxplot(data=df, x='Operatorname', y='PINGAVG')
    plt.title("Latency by Operator")
    plt.show()


In [None]:
if 'NetworkTech' in df.columns:
    plt.figure(figsize=(10,6))
    sns.boxplot(data=df, x='NetworkTech', y='DL_bitrate')
    plt.title("Downlink Throughput by Network Tech")
    plt.show()

    plt.figure(figsize=(10,6))
    sns.boxplot(data=df, x='NetworkTech', y='PINGAVG')
    plt.title("Latency by Network Tech")
    plt.show()


In [None]:

if 'Mobility' in df.columns:
    plt.figure(figsize=(10,6))
    sns.boxplot(data=df, x='Mobility', y='DL_bitrate')
    plt.title("Downlink Throughput by Mobility")
    plt.show()

    plt.figure(figsize=(10,6))
    sns.boxplot(data=df, x='Mobility', y='PINGAVG')
    plt.title("Latency by Mobility")
    plt.show()

In [None]:
if 'NetworkTech' in df.columns:
    print("Avg Throughput & Latency by NetworkTech")
    print(df.groupby('NetworkTech')[Targets].mean().round(2))

if 'Operatorname' in df.columns:
    print("Avg Throughput & Latency by Operator")
    print(df.groupby('Operatorname')[Targets].mean().round(2))

if 'Mobility' in df.columns:
    print("Avg Throughput & Latency by Mobility ")
    print(df.groupby('Mobility')[Targets].mean().round(2))