In [196]:
from typing import NamedTuple, List
import csv
import pandas as pd

drop table MonitorStation cascade constraints;
drop table Bee cascade constraints;
drop table Detect cascade constraints;
drop table GasConditions cascade constraints;
drop table Influence cascade constraints;
drop table Monitor cascade constraints;
drop table Kill cascade constraints;
drop table RiskFactors cascade constraints;
drop table Parasite cascade constraints;
drop table Pesticide cascade constraints;

CREATE TABLE MonitorStation (
    CentroidLongitude DECIMAL,
    CentroidLatitude DECIMAL,
    Year INTEGER,
    AverageTemperature DECIMAL,
    PRIMARY KEY (CentroidLongitude, CentroidLatitude, Year)
);


CREATE TABLE Bee (
    State VARCHAR(255),
    Year INTEGER,
    MaxColony INTEGER,
    LostColony INTEGER,
    PercentLost DECIMAL,
    Colony INTEGER,
    AddColony INTEGER,
    PercentRenovated DECIMAL,
    PercentLostByDisease DECIMAL,
    PRIMARY KEY (State, Year)
);

CREATE TABLE Detect (
    CentroidLongitude DECIMAL,
    CentroidLatitude DECIMAL,
    StationYear INTEGER,
    BeeState VARCHAR(255),
    BeeYear INTEGER,
    PRIMARY KEY (CentroidLongitude, CentroidLatitude, StationYear, BeeState, BeeYear),
    FOREIGN KEY (BeeState, BeeYear) REFERENCES Bee(State, Year)
);

CREATE TABLE GasConditions (
    Name VARCHAR(255),
    State VARCHAR(255),
    Year INTEGER,
    MeanValue DECIMAL,
    AverageAQI DECIMAL,
    PRIMARY KEY (Name, State, Year)
);

CREATE TABLE Influence (
    GasPollutantsYearAffected INTEGER,
    GasPollutantsStateAffected VARCHAR(255),
    BeeState VARCHAR(255),
    BeeYear INTEGER,
    PRIMARY KEY (GasPollutantsYearAffected, GasPollutantsStateAffected, BeeState, BeeYear),
    FOREIGN KEY (BeeState, BeeYear) REFERENCES Bee(State, Year)
);

CREATE TABLE RiskFactors (
    State VARCHAR(255),
    Year INTEGER,
    Name VARCHAR(255),
    PRIMARY KEY (State, Year)
);

CREATE TABLE Monitor (
    CentroidLongitude DECIMAL,
    CentroidLatitude DECIMAL,
    StationYear INTEGER,
    RiskFactorsReportedYear INTEGER,
    RiskFactorsReportedState VARCHAR(255),
    PRIMARY KEY (CentroidLongitude, CentroidLatitude, StationYear),
    FOREIGN KEY (RiskFactorsReportedYear, RiskFactorsReportedState) REFERENCES RiskFactors(Year, State),
    FOREIGN KEY (CentroidLongitude, CentroidLatitude, StationYear) REFERENCES MonitorStation(CentroidLongitude, CentroidLatitude, Year)
);


CREATE TABLE Kill (
    BeeState VARCHAR(255),
    BeeYear INTEGER,
    RiskFactorsReportedYear INTEGER,
    RiskFactorsReportedState VARCHAR(255),
    PRIMARY KEY (BeeState, BeeYear, RiskFactorsReportedYear, RiskFactorsReportedState),
    FOREIGN KEY (BeeState, BeeYear) REFERENCES Bee(State, Year),
    FOREIGN KEY (RiskFactorsReportedYear, RiskFactorsReportedState) REFERENCES RiskFactors(Year, State)
);


CREATE TABLE Parasite (
    Year INTEGER,
    State VARCHAR(255),
    PercentAffected DECIMAL,
    PRIMARY KEY (Year, State)
);

CREATE TABLE Pesticide (
    Year INTEGER,
    State VARCHAR(255),
    LowEstimate DECIMAL,
    HighEstimate DECIMAL,
    PRIMARY KEY (Year, State)
);


In [197]:
data1 = pd.read_csv("data/average_monthly_temperature_by_state_1950-2022.csv")
data2 = pd.read_csv("data/epest_county_estimates.csv")
data3 = pd.read_csv("data/save_the_bees.csv")
data4 = pd.read_csv("data/pollution_2000_2021.csv")
data5 = pd.read_csv("data/helper.csv")

In [198]:
data1

Unnamed: 0.1,Unnamed: 0,year,state,average_temp,centroid_lon,centroid_lat
0,0,2015,Alabama,64.325000,-86.828372,32.789832
1,1,2015,Arizona,61.900000,-111.664418,34.293110
2,2,2015,Arkansas,61.141667,-92.439268,34.899745
3,3,2015,California,60.958333,-119.610699,37.246071
4,4,2015,Colorado,47.550000,-105.547825,38.998552
...,...,...,...,...,...,...
170,170,2019,Texas,65.816667,-99.350697,31.484464
171,171,2019,Utah,47.800000,-111.678216,39.323795
172,172,2019,Virginia,57.383333,-78.812254,37.515024
173,173,2019,Washington,46.525000,-120.446866,47.380969


In [199]:
# def create_sql_MonitorStation():
#     data_MonitorStation = data1[["centroid_lon", "centroid_lat", "year", "

In [200]:
def create_sql_MonitorStation(input_csv_file, output_sql_file):
    try:
        with open(input_csv_file, 'r', newline='') as csvfile, open(output_sql_file, 'w') as outfile:
            reader = csv.reader(csvfile)
            next(reader)
            stationcheck = set()
            
            for row in reader:
                centroid_longitude = float(row[4])
                centroid_latitude = float(row[5])
                year = int(row[1])
                average_temperature = float(row[3])
                
                station_key = (centroid_longitude, centroid_latitude, year)
                
                if station_key not in stationcheck:
                    stationcheck.add(station_key)
                    statement = f"INSERT INTO MonitorStation (CentroidLongitude, CentroidLatitude, Year, AverageTemperature) VALUES ({centroid_longitude}, {centroid_latitude}, {year}, {average_temperature});\n"
                    outfile.write(statement)
    except Exception as e:
        print(f"An error occurred: {e}")

In [201]:
create_sql_MonitorStation("data/average_monthly_temperature_by_state_1950-2022.csv", "output.sql")

In [202]:
def create_sql_Bee(input_csv_file3, output_sql_file):
    with open(input_csv_file3) as csvfile, open(output_sql_file, mode='a') as outfile:
        reader = csv.reader(csvfile)
        next(reader)  # Skip the header
        bee_check = []
        
        for row in reader:
            State = row[1].replace('"', "'")
            Year = row[2]
            numColony = row[3]
            MaxColony = row[4]
            LostColony = row[5]
            pctLost = row[6]
            AddColony = row[7]
            pctRenovated = row[9]
            pctlostbydisease = row[11]
            
            if (State, Year) not in bee_check:
                bee_check.append((State, Year))
                statement = f"INSERT INTO Bee (State, Year, Colony, MaxColony, LostColony, PercentLost, AddColony, PercentRenovated, PercentLostByDisease) VALUES ('{State}', {Year}, {numColony}, {MaxColony}, {LostColony}, {pctLost}, {AddColony}, {pctRenovated}, {pctlostbydisease});\n"
                outfile.write(statement)


In [203]:
create_sql_Bee("data/save_the_bees.csv", "output.sql")

In [204]:
def create_sql_detect(monitor_csv_file, bee_csv_file, output_sql_file):
    monitor_data = []
    bee_data = []
    
    # Read MonitorStation data
    with open(monitor_csv_file, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            monitor_data.append({
                'CentroidLongitude': row['centroid_lon'],
                'CentroidLatitude': row['centroid_lat'],
                'Year': row['year']
            })
    
    # Read Bee data
    with open(bee_csv_file, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            bee_data.append({
                'State': row['state'],
                'Year': row['year']
            })

    # Write to output SQL file
    with open(output_sql_file, 'a') as sqlfile:
        for monitor in monitor_data:
            for bee in bee_data:
                if monitor['Year'] == bee['Year']: # Only matching if the years are the same
                    statement = f"INSERT INTO Detect (CentroidLongitude, CentroidLatitude, StationYear, BeeState, BeeYear) VALUES ({monitor['CentroidLongitude']}, {monitor['CentroidLatitude']}, {monitor['Year']}, '{bee['State']}', {bee['Year']});\n"
                    sqlfile.write(statement)

In [205]:
create_sql_detect("data/average_monthly_temperature_by_state_1950-2022.csv", "data/save_the_bees.csv", "output.sql")

In [206]:
def create_sql_GasConditions(input_csv_file4, output_sql_file):
    with open(input_csv_file4) as csvfile, open(output_sql_file, mode='a') as outfile:
        reader = csv.reader(csvfile)
        next(reader)

        for row in reader:
            Name = row[3].replace("'", "''")
            State = row[2].replace("'", "''")
            Year = row[1]
            MeanValue = row[5]
            AverageAQI = row[4]
            
            statement = f"INSERT INTO GasConditions (Name, State, Year, MeanValue, AverageAQI) VALUES ('{Name}', '{State}', {Year}, {MeanValue}, {AverageAQI});\n"
            outfile.write(statement)

In [207]:
create_sql_GasConditions("data/pollution_2000_2021.csv", "output.sql")

In [208]:
def create_sql_Influence(gas_csv_file, bee_csv_file, output_sql_file):
    bee_data = []
    with open(bee_csv_file, 'r') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)
        for row in reader:
            bee_data.append((row[1], row[2]))
            

    with open(gas_csv_file, 'r') as gasfile, open(output_sql_file, mode='a') as outfile:
        gas_reader = csv.reader(gasfile)
        next(gas_reader)
        checker = []
        for row in gas_reader:
            gas_year = row[1]
            gas_state = row[2].replace("'", "''")
            pollutant = row[3].replace("'", "''")
            if [gas_year,gas_state,pollutant] not in checker:
                checker.append([gas_year, gas_state,pollutant])
                for bee_state, bee_year in bee_data:
                    if (gas_year == bee_year) & (gas_state == bee_state):
                        statement = f"INSERT INTO Influence (GasPollutantsYearAffected, GasPollutantsStateAffected, BeeState, BeeYear, GasPollutantsName) VALUES ({gas_year}, '{gas_state}', '{bee_state}', {bee_year}, '{pollutant}');\n"
                        outfile.write(statement)

In [209]:
create_sql_Influence("data/pollution_2000_2021.csv", "data/save_the_bees.csv", "output.sql")

In [210]:
def create_sql_RiskFactors(input_csv_file, output_sql_file):
    with open(input_csv_file, 'r') as csvfile, open(output_sql_file, mode='a') as outfile:
        reader = csv.reader(csvfile)
        next(reader)
        risk_factors_check = []

        for row in reader:
            state = row[3].replace("'", "''")
            year = row[2]
            name = row[1].replace("'", "''")
            identifier = (state, year)
            
            if identifier not in risk_factors_check:
                risk_factors_check.append(identifier)
                statement = f"INSERT INTO RiskFactors (State, Year, Name) VALUES ('{state}', {year}, '{name}');\n"
                outfile.write(statement)

In [211]:
create_sql_RiskFactors("data/helper.csv", "output.sql")

In [212]:
def create_sql_Monitor(monitor_csv_file, risk_factors_csv_file, output_sql_file):
    monitor_data = []
    with open(monitor_csv_file, 'r') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)
        for row in reader:
            monitor_data.append((row[4], row[5], row[1],row[2]))  # (CentroidLongitude, CentroidLatitude, Year)
    
    with open(risk_factors_csv_file, 'r') as csvfile, open(output_sql_file, mode='a') as outfile:
        reader = csv.reader(csvfile)
        next(reader)
        checker = []
        for row in reader:
            risk_state = row[3].strip().replace("'", "''")
            risk_year = row[2].strip()
            if [risk_state,risk_year] not in checker:
                checker.append([risk_state,risk_year])
                for centroid_long, centroid_lat, year,state in monitor_data:
                    if (year == risk_year) & (state == risk_state) :
                        statement = f"INSERT INTO Monitor (CentroidLongitude, CentroidLatitude, StationYear, RiskFactorsReportedYear, RiskFactorsReportedState) VALUES ({centroid_long}, {centroid_lat}, {year}, {risk_year}, '{risk_state}');\n"
                        outfile.write(statement)

In [213]:
create_sql_Monitor("data/average_monthly_temperature_by_state_1950-2022.csv","data/helper.csv", "output.sql")

In [214]:
def create_sql_Kill(bee_csv_file, risk_factors_csv_file, output_sql_file):
    # Bee data
    bee_data = []
    with open(bee_csv_file, 'r') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)
        for row in reader:
            bee_state = row[1].replace("'", "''")
            bee_year = row[2]
            bee_data.append((bee_state, bee_year))
    
    # RiskFactors data
    with open(risk_factors_csv_file, 'r') as csvfile, open(output_sql_file, mode='a') as outfile:
        reader = csv.reader(csvfile)
        next(reader)
        checker = []
        for row in reader:
            risk_state = row[2].strip().replace("'", "''")
            risk_year = row[1].strip()
            if [risk_state,risk_year] not in checker:
                checker.append([risk_state,risk_year])
                # Check for matches with Bee data
                for bee_state, bee_year in bee_data:
                    if (bee_year == risk_year) and (bee_state == risk_state):
                        statement = f"INSERT INTO Kill (BeeState, BeeYear, RiskFactorsReportedYear, RiskFactorsReportedState) VALUES ('{bee_state}', {bee_year}, {risk_year}, '{risk_state}');\n"
                        outfile.write(statement)

In [215]:
create_sql_Kill("data/save_the_bees.csv","data/pollution_2000_2021.csv", "output.sql")

In [216]:
#state year %affected
def create_sql_Parasite(input_csv_file, output_sql_file):
    with open(input_csv_file, 'r') as csvfile, open(output_sql_file, mode='a') as outfile:
        reader = csv.reader(csvfile)
        next(reader)
        parasite_check = []

        for row in reader:
            year = row[2]
            state = row[1].replace("'", "''")
            percentAffected = row[-2]

            identifier = (year, state)
            
            if identifier not in parasite_check:
                parasite_check.append(identifier)
                statement = f"INSERT INTO Parasite (Year, State, PercentAffected) VALUES ({year}, '{state}', {percentAffected});\n"
                outfile.write(statement)

In [217]:
create_sql_Parasite("data/save_the_bees.csv", "output.sql")

In [218]:
def create_sql_Pesticide(input_csv_file, output_sql_file):
    with open(input_csv_file, 'r') as csvfile, open(output_sql_file, mode='a') as outfile:
        reader = csv.reader(csvfile)
        next(reader)
        pesticide_check = []

        for row in reader:
            year = row[2]
            state = row[5].replace("'", "''")
            lowEstimate = row[3]
            highEstimate = row[4]

            if (year, state) not in pesticide_check:
                pesticide_check.append((year, state))
                statement = f"INSERT INTO Pesticide (Year, State, LowEstimate, HighEstimate) VALUES ({year}, '{state}', {lowEstimate}, {highEstimate});\n"
                outfile.write(statement)

In [219]:
create_sql_Pesticide("data/epest_county_estimates.csv", "output.sql")