In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats

In [None]:
# Loading dataset using pandas 
df = pd.read_csv('user_behavior_dataset.csv', index_col = 'User ID')
df.head()

In [None]:
# Number of rows and columns of dataset
df.shape

In [None]:
# Some basic statistics
df.describe()

In [None]:
# checking for empty values
df.isnull().sum()

In [None]:
# Counting number of duplicate rows
df.duplicated().sum()

In [None]:
# Selecting Numeric columns
numeric_df = df.select_dtypes(include = ['number'])
numeric_df

In [None]:
# Showing basic statistics like mean, median, standard deviation etc
def calculate_statistics(df):
    major_moments = pd.DataFrame({
        'Mean': df.mean(),
        'Median': df.median(),
        'Standard Deviation': df.std(),
        'Skewness': df.apply(stats.skew),
        'Kurtosis': df.apply(stats.kurtosis),
    })
    return major_moments

In [None]:
stats = calculate_statistics(numeric_df)
print(stats)

In [None]:
# Storing Unique device models and their counts
models = df['Device Model'].unique()
model_counts = df['Device Model'].value_counts()

In [None]:
# Function to plot pie chart
def plot_models_pie(model_counts, labels = models):
    # Creates a pie chart of the device models used by users
    plt.figure(dpi = 144, figsize=(3,3))
    plt.pie(model_counts, labels = labels, autopct = '%1.1f%%', startangle = 90)
    # Title of the pie chart
    plt.title('Mobile Models Used by Users', fontsize = 10)
    plt.axis('equal')
    plt.show()
    return

In [None]:
plot_models_pie(model_counts, models)

In [None]:
data_usage = df['Data Usage (MB/day)']
screen_on_time = df['Screen On Time (hours/day)']

In [None]:
#Sometimes code doesn't work because all cells work in parallel thats's why I included this library here
from scipy import stats
# Storing some basic statistics in variables
age = df['Age']
mean_age = df['Age'].mean()
median_age = df['Age'].median()
mode_age = df['Age'].mode()[0]
skewness_age = stats.skew(df['Age'])
kurtosis_age = stats.kurtosis(df['Age'])

In [None]:
# Plotting Histogram
def plot_hist(age):
    plt.figure(dpi = 144, figsize = (6, 4))
    plt.hist(age, bins=10, color = 'purple', edgecolor='black', alpha = 0.7)
    # Plot mean, median, and mode as vertical lines
    plt.axvline(mean_age, color='red', linestyle = '--', linewidth = 2, label=f'Mean: {mean_age:.2f}')
    plt.axvline(median_age, color='green', linestyle='--', linewidth = 2, label = f'Median: {median_age:.2f}')
    plt.axvline(mode_age, color = 'blue', linestyle = '--', linewidth = 2, label = f'Mode: {mode_age}')    
    # Move skewness and kurtosis text to bottom right
    plt.text(0.95, 0.05, f'Skewness: {skewness_age:.2f}', 
         transform = plt.gca().transAxes, ha = 'right', va = 'bottom', fontsize = 10, color = 'black')
    plt.text(0.95, 0.10, f'Kurtosis: {kurtosis_age:.2f}', 
         transform = plt.gca().transAxes, ha = 'right', va = 'bottom', fontsize = 10, color = 'black')
    # Add labels and title
    plt.xlabel('Age')
    plt.ylabel('Frequency')
    plt.title('Age Distribution', fontsize = 10)
    plt.legend()
    plt.show()

In [None]:
plot_hist(age)

In [None]:
# Storing specific column values in variables
app_usage = df['App Usage Time (min/day)']
battery_drain = df['Battery Drain (mAh/day)']
battery_drain

In [None]:
# Function to plot scatter plot
def app_usage_vs_battery_drain_scatter(app_usage, battery_drain):
    #Creates a line graph of age against on screen time
    plt.figure(dpi = 144, figsize = (4 ,4))  
    plt.scatter(battery_drain, app_usage, alpha = 0.7, color = 'purple') 
    #Setting title and labels
    plt.title('Battery Drain vs App Usage', fontsize = 10)
    plt.xlabel('Battery Drain')  
    plt.ylabel('App Usage') 
    plt.grid(True) 
    plt.tight_layout()  
    plt.show()

In [None]:
 app_usage_vs_battery_drain_scatter(app_usage, battery_drain)

In [None]:
# Calculate the average number of apps installed for each mobile type
avg_drain_time = df.groupby('Device Model')['Battery Drain (mAh/day)'].mean()
avg_drain_time

In [None]:
# Plotting bar graph
def plot_avg_drain_time_by_mobile_type_matplotlib(avg_drain_time):
    plt.figure(dpi = 144, figsize = (8, 4))  
    plt.bar(avg_drain_time.index, avg_drain_time.values, color='purple')
     # Calculate the mean of the average drain time
    mean_drain_time = avg_drain_time.mean()    
    # Plot horizontal line for mean
    plt.axhline(y = mean_drain_time, color = 'red', linestyle = '--', linewidth = 2)    
    # Add label for the mean line manually
    plt.text(len(avg_drain_time) - 3, mean_drain_time + 12, f'Mean: {mean_drain_time:.2f}', 
             color = 'red', fontsize = 10, ha = 'left', va = 'bottom')
    #Setting title and labels
    plt.title('Average drain time by Mobile Type', fontsize = 10)  
    plt.xlabel('Mobile Type')  
    plt.ylabel('Average Drain Time(mAh/day)')  
    plt.grid(axis = 'y')  
    plt.tight_layout() 
    plt.show()

In [None]:
# Plotting the bar chart
plot_avg_drain_time_by_mobile_type_matplotlib(avg_drain_time)

In [None]:
# Correlation Matrix using Pearson Method
numeric_df.corr()

In [None]:
# Correlation Matrix using Kendall Method
numeric_df.corr(method = 'kendall')

In [None]:
# Plottting Heatmaps
def plot_mobile_correlation(numeric_df, method):    
    fig, ax = plt.subplots(dpi = 144, figsize = (8, 4))
    mask = np.triu(np.ones_like(numeric_df.corr()))
    sns.heatmap(numeric_df.corr(method = method), ax = ax, vmin = -1, vmax = 1,
                cmap = 'RdBu', annot = True, mask = mask)
    # Rorating labels for readability
    plt.xticks(rotation=45, ha='right')
    # Setting title of heatmap
    plt.title(f" Heatmap Using {method.capitalize()} Correlation Matrix", fontsize = 10)
    plt.show()
    return

In [None]:
plot_mobile_correlation(numeric_df, 'pearson')
plot_mobile_correlation(numeric_df, 'kendall')