# Dataset Playground

## Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
import numpy.typing as npt
from typing import List, Union
import helper_func as hf

## Load the data

In [None]:
health_df = pd.read_csv("./data/data.csv")
health_df.head()


## Sorting data

In [None]:
indicators_df = health_df.groupby(['Indicator Name']).sum(numeric_only=True).sort_values("1960")


hiv_by_year_df: pd.DataFrame = indicators_df.T.iloc[15:]

hiv_by_year_df.head(10)


### Filtered list

In [None]:
filtered_hiv_by_year_df = hiv_by_year_df[["% of males ages 15-49 having comprehensive correct knowledge about HIV (2 prevent ways and reject 3 misconceptions)",
    "Literacy rate, adult male (% of males ages 15 and above)",
    "Urban population",
    "Rural population"
    
    
                 ]]
filtered_hiv_by_year_df.rename(columns={"% of males ages 15-49 having comprehensive correct knowledge about HIV (2 prevent ways and reject 3 misconceptions)": "Males 19-49 knowledge", 
                                        "Literacy rate, adult male (% of males ages 15 and above)": "Adult male literacy rate"}, inplace=True)
filtered_hiv_by_year_df.drop("Unnamed: 60", inplace=True)
filtered_hiv_by_year_df

#### Scale pulation to 10^7

In [None]:

filtered_hiv_by_year_df.rename(columns={"Urban population": "Urban population x 10^7",
                                        "Rural population": "Rural population x 10^7" }, inplace=True)
filtered_hiv_by_year_df["Rural population x 10^7"] = filtered_hiv_by_year_df["Rural population x 10^7"].apply(lambda val: val / (10**7))

filtered_hiv_by_year_df["Urban population x 10^7"] = filtered_hiv_by_year_df["Urban population x 10^7"].apply(lambda val: val / (10**7))
filtered_hiv_by_year_df.head()

#### Adds ratio of literacy to urban/rural population

In [None]:
filtered_hiv_by_year_df["Literacy to rural population"] = filtered_hiv_by_year_df["Adult male literacy rate"]/filtered_hiv_by_year_df["Rural population x 10^7"]
filtered_hiv_by_year_df["Literacy to Urban population"] = filtered_hiv_by_year_df["Adult male literacy rate"]/filtered_hiv_by_year_df["Urban population x 10^7"]
filtered_hiv_by_year_df.head()

### Line Graph

#### Adds ratio of males 19-49 knowledge to polulation

In [None]:
filtered_hiv_by_year_df["Male knowledge to rural population"] = filtered_hiv_by_year_df["Males 19-49 knowledge"]/filtered_hiv_by_year_df["Rural population x 10^7"]
filtered_hiv_by_year_df["Male knowledgeto Urban population"] = filtered_hiv_by_year_df["Males 19-49 knowledge"]/filtered_hiv_by_year_df["Urban population x 10^7"]
filtered_hiv_by_year_df.sample(5)

#### Graph function

In [None]:
ax: Axes
fig, ax = plt.subplots()

ax.plot(filtered_hiv_by_year_df.index, filtered_hiv_by_year_df["Rural population x 10^7"], label="Rural population x 10^7") 
# ax.plot(filtered_hiv_by_year_df.index, filtered_hiv_by_year_df["Urban population"], label="Urban population") 
ax.plot(filtered_hiv_by_year_df.index, filtered_hiv_by_year_df["Males 19-49 knowledge"], label="Males 19-49 knowledge") 
ax.plot(filtered_hiv_by_year_df.index, filtered_hiv_by_year_df["Adult male literacy rate"], label="Adult male literacy rate") 


ax.legend()
fig.tight_layout()

In [None]:
hf.create_line_chart (filtered_hiv_by_year_df)

In [None]:
ax: Axes
fig, ax = plt.subplots()

ax.plot( filtered_hiv_by_year_df["Literacy to Urban population"], label="Literacy to Urban population")
ax.plot( filtered_hiv_by_year_df["Literacy to rural population"], label="Literacy to rural population")

ax.set_ylabel("population x 10^7")
ax.set_xlabel("Adult male literacy rate")
ax.legend()
fig.tight_layout()