In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Graphing and Visualizations

In [None]:
data = pd.read_csv("../Data/Intermediate/CDCSTDoverall2000_2020_processed.csv")

In [None]:
data.head()
data.drop(columns=["Unnamed: 0"])

In [None]:
df_sum = data.groupby(['year'])['cases_gonorrhea','cases_chlamydia','cases_early_non-primary_non-secondary_syphilis','cases_primary_and_secondary_syphilis'].sum().reset_index()

In [None]:
df_sum.head()
df_sum = df_sum[df_sum.year >= 2002]

In [None]:
sns.set_theme(style="ticks", palette=None)
fig, ax = plt.subplots(nrows=2,ncols=1)
plt.figure(figsize = (10,8))
plt.subplot(2,1,1)
plt.plot(df_sum['cases_chlamydia'], 'k', label='Chlamydia', marker = ".")
plt.plot(df_sum['cases_gonorrhea'], 'r', label='Gonorrhea', marker = ".")
plt.legend()
plt.ylabel('Case Counts')
# plt.xticks(df_sum['year'])
plt.title("U.S. STI Cases, 2002-2020")
plt.xticks(ticks=[2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], labels=[2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])
current_values = plt.gca().get_yticks()
plt.gca().set_yticklabels(['{:,.0f}'.format(x) for x in current_values])

plt.subplot(2,1,2)
plt.plot(df_sum['cases_early_non-primary_non-secondary_syphilis'], 'b', label='Early, Non-Primary, Non-Secondary Syphilis', marker = ".")
plt.plot(df_sum['cases_primary_and_secondary_syphilis'], 'orange', label='Primary and Secondary Syphilis', marker = ".")
plt.legend()
plt.ylabel('Case Counts')
# plt.xticks(df_sum['year'])
plt.xlabel('Year')
plt.xticks(ticks=[2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], labels=[2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020])
current_values = plt.gca().get_yticks()
plt.gca().set_yticklabels(['{:,.0f}'.format(x) for x in current_values])
plt.savefig('../Output/PosterPresentation/cases_gonorrhea_chlamydia_syphilis',dpi=300)
plt.show()

# Predictions

## National-Level Chlamydia Prediction

In [None]:
data = pd.read_csv("../Data/Final/STI0220_cov20_pop0220.csv")
data.drop(columns=["Unnamed: 0"])
data.head()

In [None]:
df_sum = data.groupby(['year'])['cases_gonorrhea','cases_chlamydia','cases_early_non-primary_non-secondary_syphilis','cases_primary_and_secondary_syphilis', "total_pop"].sum().reset_index()
df_sum.head()

In [None]:
df_model = df_sum[["year", "cases_chlamydia"]]
df_model["year"] = df_model["year"].astype(int)
# actual_value = df_model.at[20, "cases_chlamydia"]
df_plot = df_model
df_model = df_model[df_model["year"] <= 2019]
df_model.head(21)

In [None]:
from statsmodels.tsa.arima.model import ARIMA
model = ARIMA(df_model["cases_chlamydia"], order=(2,2,0))
model_fit = model.fit()

In [None]:
 # summary of fit model
print(model_fit.summary())

In [None]:
pred = model_fit.predict(20).to_frame()
fitted_value = pred.iloc[0][0]

In [None]:
# print("Predicted: " + str(fitted_value) + " Actual: " + str(actual_value))

In [None]:
sns.set_theme(style="ticks", palette=None)
plt.figure(figsize = (15,8))
g = sns.lineplot(data = df_plot, x = 'year', y = 'cases_chlamydia', marker = "o")
g.set_ylabel('Case Counts')
g.set_xlabel('Year')
g.set_xticks(df_sum['year'])
g.set_title("U.S. Chlamydia Cases, 2002-2020 and Predicted Chlamydia Cases (Red), 2020")
plt.legend(labels=["Actual Gonorrhea Cases"])
current_values = plt.gca().get_yticks()
plt.gca().set_yticklabels(['{:,.0f}'.format(x) for x in current_values])
plt.scatter(2020, fitted_value, marker='o', s=25, c="red")
plt.savefig('../Output/PosterPresentation/ModelOutputs/arima_chlamydia.png',dpi=300)
plt.show()

# Counterfactual Visualization

In [None]:
data = pd.read_csv("../Data/Final/STI0220_cov20_pop0220.csv")
data.drop(columns=["Unnamed: 0"])
data.head()

In [None]:
df_sum = data.groupby(['year', "fips"])['cases_gonorrhea','cases_chlamydia','cases_early_non-primary_non-secondary_syphilis','cases_primary_and_secondary_syphilis', "total_pop"].sum().reset_index()
df_sum.head()

In [None]:
df_model = df_sum[["year", "fips", "cases_chlamydia"]]
df_model["year"] = df_model["year"].astype(int)

df_model1 = df_model[df_model.fips == 1001]
df_plot1 = df_model1
df_model1 = df_model1[df_model1["year"] <= 2019]
df_model1.head(21)

In [None]:
from statsmodels.tsa.arima.model import ARIMA
model = ARIMA(df_model1["cases_chlamydia"], order=(2,2,0))
model_fit = model.fit()

pred = model_fit.predict(20).to_frame()
fitted_value = pred.iloc[0][0]

In [None]:
# sns.set_theme(style="ticks", palette=None)
# plt.figure(figsize = (15,8))
# g = sns.lineplot(data = df_plot1, x = 'year', y = 'cases_chlamydia', marker = "o")
# g.set_ylabel('Case Counts')
# g.set_xlabel('Year')
# g.set_xticks(df_sum['year'])
# g.set_title("U.S. Chlamydia Cases, 2002-2020 and Predicted Chlamydia Cases (Red), 2020")
# plt.legend(labels=["Actual Gonorrhea Cases"])
# current_values = plt.gca().get_yticks()
# plt.gca().set_yticklabels(['{:,.0f}'.format(x) for x in current_values])
# plt.scatter(2020, fitted_value, marker='o', s=25, c="red")
# plt.savefig('../Output/PosterPresentation/ModelOutputs/CounterfactualVisualization/arima_chlamydia.png',dpi=300)
# plt.show()