In [3]:
import requests

def download_dataset_from_google_drive(url, output_file):
    file_id = url.split("/")[-2]
    base_url = "https://docs.google.com/uc?export=download"

    session = requests.Session()
    response = session.get(base_url, params={"id": file_id}, stream=True)
    token = get_confirm_token(response)

    if token:
        params = {"id": file_id, "confirm": token}
        response = session.get(base_url, params=params, stream=True)

    save_response_content(response, output_file)
    print(f"CSV file '{output_file}' created successfully.")

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith("download_warning"):
            return value

    return None

def save_response_content(response, output_file):
    CHUNK_SIZE = 32768

    with open(output_file, "wb") as file:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk:
                file.write(chunk)

# Example usage
url = "https://drive.google.com/file/d/1mCjtYHiX--mMUjicuaP2gH3k-SnFxt8Y/view?usp=sharing"
output_file = "output.csv"
download_dataset_from_google_drive(url, output_file)


CSV file 'output.csv' created successfully.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
df = pd.read_csv("output.csv")
df.head()

Unnamed: 0,Patient ID,Blood Pressure Before (mmHg),Blood Pressure After (mmHg)
0,1,130,120
1,2,142,135
2,3,120,118
3,4,135,127
4,5,148,140


In [4]:
#Q1. a. Measure the dispersion in both and interpret the results.
# Strip whitespace from column names
df.columns = df.columns.str.strip()

# Dispersion
disp_before = df['Blood Pressure Before (mmHg)'].var()
disp_after = df['Blood Pressure After (mmHg)'].var()

print(f"Dispersion Before: {disp_before}")
print(f"Dispersion After: {disp_after}")

Dispersion Before: 43.53727272727271
Dispersion After: 47.4448484848485


In [5]:
#Q2.b. Calculate mean and 5% confidence interval and plot it in a graph
mean_before = df["Blood Pressure Before (mmHg)"].mean()
mean_after = df["Blood Pressure After (mmHg)"].mean()
confidence_interval_before = stats.t.interval(0.95, len(df)-1, loc=mean_before, scale=df["Blood Pressure Before (mmHg)"].sem())
confidence_interval_after = stats.t.interval(0.95, len(df)-1, loc=mean_after, scale=df["Blood Pressure After (mmHg)"].sem())
print("\nMean and 5% Confidence Interval:")
print("Before: Mean =", mean_before, "Confidence Interval =", confidence_interval_before)
print("After: Mean =", mean_after, "Confidence Interval =", confidence_interval_after)



Mean and 5% Confidence Interval:
Before: Mean = 133.91 Confidence Interval = (132.60075849172767, 135.21924150827232)
After: Mean = 128.36 Confidence Interval = (126.99326697818071, 129.7267330218193)


In [6]:
#Q3.c. Calculate the Mean absolute deviation and Standard deviation and interpret the results.
mean_before = df["Blood Pressure Before (mmHg)"].mad()
mean_after = df["Blood Pressure After (mmHg)"].mad()
sd_before = df["Blood Pressure Before (mmHg)"].std()
sd_after = df["Blood Pressure After (mmHg)"].std()
print("\nMean Absolute Deviation (Mean) and Standard Deviation (SD):")
print("Before: Mean =", mean_before, "SD =", sd_before)
print("After: Mean =", mean_after, "SD =", sd_after)


Mean Absolute Deviation (Mean) and Standard Deviation (SD):
Before: Mean = 5.7118 SD = 6.598278012275075
After: Mean = 5.9 SD = 6.8880221025232276


In [7]:
#Q4.d. Calculate the correlation coefficient and check the significance of it at 1% level of significance.
correlation_coefficient, p_value = stats.pearsonr(df["Blood Pressure Before (mmHg)"], df["Blood Pressure After (mmHg)"])
significance_level = 0.01

print("\nCorrelation Coefficient and Significance Test:")
print("Correlation Coefficient =", correlation_coefficient)
print("P-value =", p_value)

if p_value < significance_level:
    print("The correlation coefficient is significant at the 1% level.")
else:
    print("The correlation coefficient is not significant at the 1% level.")


Correlation Coefficient and Significance Test:
Correlation Coefficient = 0.9779485966556019
P-value = 1.8097381433068541e-68
The correlation coefficient is significant at the 1% level.
