# Exploratory Data Analysis and Hypothesis Testing

This notebook explores the relationship between gaming, sleep duration, and cigarette consumption.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import statsmodels.api as sm

In [None]:
df = pd.read_excel('data(son).xlsx')
df.head()

## Correlation Heatmap

In [None]:
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.show()

## Histogram of Cigarettes Smoked

In [None]:
sns.histplot(df['cigarettes smoked'], kde=True)
plt.title('Histogram of Cigarettes Smoked While Gaming')
plt.show()

## Game Hours vs Cigarettes Smoked

In [None]:
sns.regplot(x='Game Hours', y='cigarettes smoked', data=df)
plt.title('Game Hours vs Cigarettes Smoked')
plt.show()

## Game Hours vs Sleep Hours

In [None]:
sns.regplot(x='Game Hours', y='Sleep Hours', data=df)
plt.title('Game Hours vs Sleep Hours')
plt.show()

## Hypothesis 1: Does playing games reduce sleep duration?

In [None]:
corr, p = pearsonr(df['Game Hours'], df['Sleep Hours'])
print(f'Pearson r: {corr:.2f}, p-value: {p:.4g}')
X = sm.add_constant(df['Game Hours'])
model = sm.OLS(df['Sleep Hours'], X).fit()
print(model.summary())

## Hypothesis 2: Does playing more games increase cigarettes smoked while gaming?

In [None]:
corr, p = pearsonr(df['Game Hours'], df['cigarettes smoked'])
print(f'Pearson r: {corr:.2f}, p-value: {p:.4g}')
X = sm.add_constant(df['Game Hours'])
model = sm.OLS(df['cigarettes smoked'], X).fit()
print(model.summary())