<h3><b>Load and Merge Datasets</b></h3>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from math import sqrt

# Load the datasets
dataset1 = pd.read_csv('dataset1.csv')  # Demographic data
dataset2 = pd.read_csv('dataset2.csv')  # Screen time data
dataset3 = pd.read_csv('dataset3.csv')  # Well-being indicators

# Merge dataset1 (demographics), dataset2 (screen time), and dataset3 (well-being) on 'ID'
merged_data = pd.merge(pd.merge(dataset1, dataset2, on='ID'), dataset3, on='ID')

# Set Seaborn style for more appealing visualizations
sns.set(style="whitegrid")


Defining Screen Time Columns

In [2]:
screen_time_columns = ['C_we', 'C_wk', 'G_we', 'G_wk', 'S_we', 'S_wk', 'T_we', 'T_wk']

<h3><b>Descriptive Statistical Analysis</b></h3>

<h4><b>Investigation 1: Descriptive Statistics for Digital Screen Time</b></h4>

<b>Propose:</b> Analyze and describe the general trends in digital screen time by gender and deprivation status 


<b>Justify:</b> Understanding screen time patterns based on demographic factors like gender and deprivation status can reveal key insights into user behavior.

<b>Key Findings:</b> TV has the highest average screen time on both weekdays and weekends.

<h4><b>Execution and Results</b></h4>

Execution based on gender

In [None]:
# Analysis based on gender ---
print("\nAnalyzing screen time based on demographic factor: gender...")
gender_groups = merged_data.groupby('gender')[screen_time_columns].mean()

# Display mean screen time by gender
print(f"\nMean screen time by gender:")
print(gender_groups)

<h3><b>Visualization</b></h3>

In [None]:
# Visualization 1: Bar chart of average screen time by gender
print("\nPlotting bar chart of average screen time by gender...")
gender_groups.T.plot(kind='bar', figsize=(10, 6), title="Average Screen Time by Gender")
plt.ylabel("Average Screen Time (hours)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Execution based on deprivation status

In [None]:
print("\nAnalyzing screen time based on deprivation status...")
deprivation_groups = merged_data.groupby('deprived')[screen_time_columns].mean()

# Display mean screen time by deprivation status
print(f"\nMean screen time by deprivation status:")
print(deprivation_groups)

<h3><b>Visualization</b></h3>

In [None]:
# Visualization 2: Bar chart of average screen time by deprivation status
print("\nPlotting bar chart of average screen time by deprivation status...")
deprivation_groups.T.plot(kind='bar', figsize=(10, 6), title="Average Screen Time by Deprivation Status")
plt.ylabel("Average Screen Time (hours)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Calculate and display descriptive statistics

In [None]:
screen_time_stats = {}
for col in screen_time_columns:
    print(f"Calculating statistics for {col}:")
    
    # Mean
    mean_value = np.mean(merged_data[col])
    print(f"  Mean: {mean_value:.2f} hours")
    
    # Median
    median_value = np.median(merged_data[col])
    print(f"  Median: {median_value:.2f} hours")
    
    # Standard Deviation
    std_value = np.std(merged_data[col])
    print(f"  Standard Deviation: {std_value:.2f}")
    
    # Interquartile Range (IQR)
    iqr_value = np.percentile(merged_data[col], 75) - np.percentile(merged_data[col], 25)
    print(f"  Interquartile Range (IQR): {iqr_value:.2f}\n")
    
    # Store stats in dictionary
    screen_time_stats[col] = {
        'mean': mean_value,
        'median': median_value,
        'std': std_value,
        'iqr': iqr_value
    }
  

<h3><b>Visualization</b></h3>

In [None]:
# Visualization 3: Pairplot for Screen Time
print("Creating pairplot for screen time data...")
plt.figure(figsize=(10, 6))
sns.pairplot(merged_data[screen_time_columns], diag_kind="kde", corner=True)
plt.suptitle("Pairplot of Screen Time Activities (Weekday and Weekend)", y=1.02, fontsize=16)
plt.show()