# 1. Basic Statistical Concepts:

In this section, we'll introduce some basic statistical concepts using Python and Jupyter Notebook.

In [None]:
!pip install scipy

In [None]:
# Module 4: Data Analysis and Statistics

# 1. Basic Statistical Concepts

# Statistical concepts are fundamental in data analysis. Let's start with some basics.

# Import necessary libraries

import numpy as np
import scipy.stats as stats

# Generate a sample dataset
data = np.random.normal(loc=50, scale=10, size=100)

# Calculate the mean (average)
mean = np.mean(data)
print("Mean:", mean)

# Calculate the median (middle value)
median = np.median(data)
print("Median:", median)

# Calculate the standard deviation
std_dev = np.std(data)
print("Standard Deviation:", std_dev)

# These are fundamental statistics that help us understand the central tendency and variability of data.


## 2. Descriptive Statistics:

In this section, we'll explore descriptive statistics, such as measures of central tendency and spread.

In [None]:
# 2. Descriptive Statistics

# Descriptive statistics summarize and describe the main features of a dataset.

# Generate another sample dataset
data2 = np.random.normal(loc=40, scale=5, size=100)

# Calculate the variance
variance = np.var(data2)
print("Variance:", variance)

# Calculate the range
data_range = np.ptp(data2)
print("Range:", data_range)

# Calculate the interquartile range (IQR)
iqr = np.percentile(data2, 75) - np.percentile(data2, 25)
print("Interquartile Range (IQR):", iqr)

# Use the describe() function for a summary of descriptive statistics
stats_summary = stats.describe(data2)
print("\nSummary Statistics:")
print(stats_summary)

# Descriptive statistics provide insights into the distribution and spread of data.


## 3. Hypothesis Testing and Statistical Inference:

In this section, we'll perform a hypothesis test using Python to make statistical inferences.

In [None]:
# 3. Hypothesis Testing and Statistical Inference

# Hypothesis testing helps us make inferences about a population based on a sample.

# Generate two samples for hypothesis testing
sample1 = np.random.normal(loc=50, scale=10, size=30)
sample2 = np.random.normal(loc=55, scale=10, size=30)

# Perform a two-sample t-test to compare means
t_stat, p_value = stats.ttest_ind(sample1, sample2)
print("T-statistic:", t_stat)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05  # Significance level
if p_value < alpha:
    print("Reject the null hypothesis. There is a significant difference between the two samples.")
else:
    print("Fail to reject the null hypothesis. There is no significant difference between the two samples.")

# Hypothesis testing allows us to draw conclusions about populations from sample data.
