<h3><b>Load and Merge Datasets</b></h3>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from math import sqrt

# Load the datasets
dataset1 = pd.read_csv('dataset1.csv')  # Demographic data
dataset2 = pd.read_csv('dataset2.csv')  # Screen time data
dataset3 = pd.read_csv('dataset3.csv')  # Well-being indicators

# Merge dataset1 (demographics), dataset2 (screen time), and dataset3 (well-being) on 'ID'
merged_data = pd.merge(pd.merge(dataset1, dataset2, on='ID'), dataset3, on='ID')

# Set Seaborn style for more appealing visualizations
sns.set(style="whitegrid")


Defining Screen Time Columns

In [2]:
screen_time_columns = ['C_we', 'C_wk', 'G_we', 'G_wk', 'S_we', 'S_wk', 'T_we', 'T_wk']

<h3><b>Descriptive Statistical Analysis</b></h3>

<h4><b>Investigation 2: Descriptive Statistics for Well-being Indicators</b></h4>

<b>Propose:</b> Investigate the trends in well-being indicators based on self-reported responses

<b>Justify:</b> Understanding well-being trends can provide insights into mental and emotional health in relation to screen time.

<b>Key Findings:</b> Positive well-being scores for indicators like 'Feeling Loved' and 'Cheerfulness.'

<h4><b>Execution and Results</b></h4>

Define well-being indicator columns


In [3]:
wellbeing_columns = ['Optm', 'Usef', 'Relx', 'Intp', 'Engs', 'Dealpr', 'Thcklr', 'Goodme', 'Clsep', 'Conf', 'Mkmind', 'Loved', 'Intthg', 'Cheer']

In [None]:
wellbeing_stats = {}
for col in wellbeing_columns:
    print(f"Calculating statistics for {col}:")
    
    # Mean
    mean_value = np.mean(merged_data[col])
    print(f"  Mean: {mean_value:.2f}")
    
    # Median
    median_value = np.median(merged_data[col])
    print(f"  Median: {median_value:.2f}")
    
    # Standard Deviation
    std_value = np.std(merged_data[col])
    print(f"  Standard Deviation: {std_value:.2f}")
    
    # Interquartile Range (IQR)
    iqr_value = np.percentile(merged_data[col], 75) - np.percentile(merged_data[col], 25)
    print(f"  Interquartile Range (IQR): {iqr_value:.2f}\n")
    
    # Store stats in dictionary
    wellbeing_stats[col] = {
        'mean': mean_value,
        'median': median_value,
        'std': std_value,
        'iqr': iqr_value
    }

<h3><b>Visualization</b></h3>

In [None]:
# Visualization 1: Pairplot for Well-being Indicators
print("Creating pairplot for well-being indicators...")
plt.figure(figsize=(10, 6))
sns.pairplot(merged_data[wellbeing_columns], diag_kind="kde", corner=True)
plt.suptitle("Pairplot of Well-being Indicators", y=1.02, fontsize=16)
plt.show()

<h4><b>Additional Analysis</b></h4>

In [None]:
# Additional Analysis: Exploring screen time differences by deprivation status
print("\nAnalyzing screen time patterns based on deprivation status...")
deprivation_groups = merged_data.groupby('deprived')[screen_time_columns].mean()

# Display mean screen time by deprivation status
print(f"\nMean screen time by deprivation status:")
print(deprivation_groups)

<h4><b>Visualization</b></h4>

In [None]:
# Visualization: Bar chart of average screen time by deprivation status
print("\nPlotting bar chart of average screen time by deprivation status...")
deprivation_groups.T.plot(kind='bar', figsize=(10, 6), title="Average Screen Time by Deprivation Status")
plt.ylabel("Average Screen Time (hours)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()