<h3><b>Load and Merge Datasets</b></h3>

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from math import sqrt

# Load the datasets
dataset1 = pd.read_csv('dataset1.csv')  # Demographic data
dataset2 = pd.read_csv('dataset2.csv')  # Screen time data
dataset3 = pd.read_csv('dataset3.csv')  # Well-being indicators

# Merge dataset1 (demographics), dataset2 (screen time), and dataset3 (well-being) on 'ID'
merged_data = pd.merge(pd.merge(dataset1, dataset2, on='ID'), dataset3, on='ID')

# Set Seaborn style for more appealing visualizations
sns.set(style="whitegrid")


Defining Screen Time Columns

In [8]:
screen_time_columns = ['C_we', 'C_wk', 'G_we', 'G_wk', 'S_we', 'S_wk', 'T_we', 'T_wk']

<h3><b>Inferential Statistical Analysis</b></h3>

<h4><b>Investigation 1: Inferential Analysis for Screen Time (T_we) vs Optimism (Optm)</b></h4>

<b>Propose:</b> Investigate the relationship between screen time on weekends and optimism levels

<b>Justify:</b> This analysis can determine if higher screen time correlates with lower optimism levels, providing insight into the impact of screen time on mental well-being.

<b>Key Findings:</b> Significant difference in optimism between high and low weekend screen time users.

<h4><b>Hypothesis Test</b></h4>

<b>H0:</b> There is no significant difference between the means of weekend screen time and optimism.

<b>H1:</b> There is a significant difference between the means of weekend screen time and optimism.

<b>Step 1:</b> T-Test and P-Value

In [9]:
t_stat_optm, p_value_optm = stats.ttest_ind(merged_data['T_we'], merged_data['Optm'])

<b>Step 2:</b> Z-Score Calculation

In [10]:
mean_T_we = np.mean(merged_data['T_we'])
std_T_we = np.std(merged_data['T_we'], ddof=1)
mean_Optm = np.mean(merged_data['Optm'])
std_Optm = np.std(merged_data['Optm'], ddof=1)
n = len(merged_data)

z_score_T_we_optm = (mean_T_we - mean_Optm) / (std_T_we / sqrt(n))

<b>Step 3:</b> Confidence Interval for T_we

In [11]:
confidence_level = 0.95
degrees_freedom = n - 1
confidence_interval_T_we = stats.t.interval(confidence_level, degrees_freedom, mean_T_we, std_T_we / sqrt(n))

<h3><b>Execution and Results</b></h3>

In [None]:
print(f"\nT-Test Results for Screen Time (Weekend) vs Optimism:")
print(f"T-Statistic: {t_stat_optm}, P-Value: {p_value_optm}")
print(f"Z-Score: {z_score_T_we_optm}")
print(f"95% Confidence Interval for Screen Time (Weekend): {confidence_interval_T_we}")

<h4><b>Interpretation of Hypothesis Test</b></h4>

In [None]:
if p_value_optm < 0.05:
    print("Reject the null hypothesis (H0): There is a significant difference between weekend screen time and optimism.")
else:
    print("Fail to reject the null hypothesis (H0): There is no significant difference between weekend screen time and optimism.")

<h3><b>Visualization</b></h3>

In [None]:
# Visualization: Scatter plot with regression line for T_we vs Optm
plt.figure(figsize=(8, 5))
sns.regplot(x=merged_data['T_we'], y=merged_data['Optm'], scatter_kws={"color": "blue"}, line_kws={"color": "red"})
plt.title("Screen Time (Weekend) vs Optimism")
plt.xlabel("Screen Time (Weekend) in Hours")
plt.ylabel("Optimism Score")
plt.grid(True)
plt.tight_layout()
plt.show()