In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Loading Census data
data_census = pd.read_csv(r"C:\Users\KAMINENI MONIKA\Downloads\acs2017_census_tract_data.csv")
print("Displaying the first 10 rows of Census Data:")
print(data_census.head(10))

# Loading Pandemic data
data_pandemic = pd.read_csv(r"C:\Users\KAMINENI MONIKA\Desktop\COVID_county_data.csv")
print("\nHere's the Pandemic Data:")
print(data_pandemic.head(10))

# Function to transform Census data
def transform_census(data):
    county_data = data[['County', 'State', 'TotalPop', 'Poverty', 'IncomePerCap']]
    county_summary = county_data.groupby(['County', 'State']).agg(
        Population=('TotalPop', 'sum'),
        PovertyRate=('Poverty', 'mean'),
        IncomePerCapita=('IncomePerCap', 'mean')
    ).reset_index()
    county_summary['CountyID'] = county_summary.index + 1  # Adding CountyID
    return county_summary

# Function to filter county data based on specified counties and states
def filter_county_data(data, counties, states):
    table_data = data[
        (data['County'].isin(counties)) &
        (data['State'].isin(states))
    ]
    return table_data

# Function to transform Pandemic data
def transform_pandemic_data(data):
    data['Date'] = pd.to_datetime(data['Date'])
    data['MonthYear'] = data['Date'].dt.to_period('M')
    pandemic_summary = data.groupby(['County', 'State', 'FIPS', 'MonthYear']).agg(
        Cases=('Cases', 'sum'),
        Deaths=('Deaths', 'sum')
    ).reset_index()
    return pandemic_summary

# Function to filter Pandemic data based on county and months
def filter_pandemic_data(data, county, months):
    filtered_data = data[
        (data['County'] == county) &
        (data['MonthYear'].astype(str).isin(months))
    ]
    return filtered_data

# Function to merge Pandemic and Census data
def merge_pandemic_census_data(pandemic, census):
    merged_data = pandemic.groupby('County').agg(
        TotalCases=('Cases', 'sum'),
        TotalDeaths=('Deaths', 'sum')
    ).reset_index()
    
    merged_data = merged_data.merge(census, left_on='County', right_on='County')
    merged_data['CasesPer100K'] = merged_data['TotalCases'] / (merged_data['Population'] / 100000)
    merged_data['DeathsPer100K'] = merged_data['TotalDeaths'] / (merged_data['Population'] / 100000)
    merged_data['Poverty %'] = merged_data['PovertyRate']
    
    return merged_data

# Function to transform COVID data to monthly level
def transform_covid_to_monthly(data):
    covid_monthly = data.groupby(['County', 'MonthYear']).agg(
        TotalCases=('Cases', 'sum'),
        TotalDeaths=('Deaths', 'sum')
    ).reset_index()
    return covid_monthly

# Function to compute correlations for a specific state
def compute_correlation(data, state):
    state_counties = data[data['State'] == state]
    cases_poverty_corr = state_counties['CasesPer100K'].corr(state_counties['Poverty %'])
    deaths_poverty_corr = state_counties['DeathsPer100K'].corr(state_counties['Poverty %'])
    cases_income_corr = state_counties['CasesPer100K'].corr(state_counties['IncomePerCapita'])
    deaths_income_corr = state_counties['DeathsPer100K'].corr(state_counties['IncomePerCapita'])
    
    return cases_poverty_corr, deaths_poverty_corr, cases_income_corr, deaths_income_corr

# Data loading
data_census = pd.read_csv('acs2017_census_tract_data.csv')
data_pandemic = pd.read_csv('COVID_county_data.csv')

# Transforming data
county_summary = transform_census(data_census)
pandemic_summary = transform_pandemic_data(data_pandemic)
covid_monthly_summary = transform_covid_to_monthly(pandemic_summary)
covid_summary = merge_pandemic_census_data(pandemic_summary, county_summary)

# Filtering data
required_counties = ['Loudoun County', 'Washington County', 'Harlan County', 'Malheur County']
required_states = ['Virginia', 'Oregon', 'Kentucky', 'Oregon']
table_data = filter_county_data(covid_summary, required_counties, required_states)
malheur_data_filtered = filter_pandemic_data(pandemic_summary, 'Malheur', ['2020-08', '2021-01', '2021-02'])

# Computing correlations
oregon_cases_poverty, oregon_deaths_poverty, oregon_cases_income, oregon_deaths_income = compute_correlation(covid_summary, 'Oregon')
usa_cases_poverty, usa_deaths_poverty, usa_cases_income, usa_deaths_income = compute_correlation(covid_summary, 'USA')

# Additional correlations
oregon_counties_corr = covid_summary[covid_summary['State'] == 'Oregon'].corr()
usa_counties_corr = covid_summary.corr()

# Printing results
print("\nSummary of Counties based on Census Data:")
print(county_summary.head())

print("\nCOVID Monthly Summary Data:")
print(covid_monthly_summary.head())

print("\nMerged Summary of Pandemic and Census Data:")
print(covid_summary.head())

print("\nTable data filtered for specified counties and states:")
print(table_data)

print("\nFiltered data for Malheur county during specific months:")
print(malheur_data_filtered[['County', 'State', 'MonthYear', 'Cases', 'Deaths']])

print(f"\nCorrelation for Oregon counties:")
print(f"Cases vs. Poverty: {oregon_cases_poverty}")
print(f"Deaths vs. Poverty: {oregon_deaths_poverty}")
print(f"Cases vs. Income: {oregon_cases_income}")
print(f"Deaths vs. Income: {oregon_deaths_income}")

print(f"\nCorrelation for all USA counties:")
print(f"Cases vs. Poverty: {usa_cases_poverty}")
print(f"Deaths vs. Poverty: {usa_deaths_poverty}")
print(f"Cases vs. Income: {usa_cases_income}")
print(f"Deaths vs. Income: {usa_deaths_income}")

print("\nAdditional Correlations for Oregon Counties:")
print(oregon_counties_corr[['CasesPer100K', 'Poverty %', 'IncomePerCapita', 'DeathsPer100K']])

print("\nAdditional Correlations for all USA Counties:")
print(usa_counties_corr[['CasesPer100K', 'Poverty %', 'IncomePerCapita', 'DeathsPer100K']])

# Plot significant correlations
significant_correlations = {
    "Oregon counties (cases vs. poverty)": oregon_cases_poverty,
    "USA counties (cases vs. poverty)": usa_cases_poverty
}

for title, r_value in significant_correlations.items():
    if abs(r_value) > 0.5:
        if "Oregon" in title:
            data = covid_summary[covid_summary['State'] == 'Oregon']
        else:
            data = covid_summary
        x_col = 'CasesPer100K'
        y_col = 'Poverty %'
        plt.figure()
        sns.scatterplot(data=data, x=x_col, y=y_col)
        plt.title(f"Scatter plot of {x_col} vs {y_col} ({title})")
        plt.show()


PermissionError: [Errno 13] Permission denied: 'C:\\Users\\KAMINENI MONIKA\\Downloads\\acs2017_census_tract_data.csv'