In [1]:
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
import sqlite3
from sqlalchemy import create_engine

In [2]:
database_path = Path("./save_the_bees.db")

engine = create_engine(f"sqlite:///{database_path}")
conn = engine.connect()
sql_query = "SELECT * FROM bee_colonies"

df = pd.read_sql(sql_query, conn)

In [3]:
df = df[['state', 'num_colonies', 'max_colonies', 'lost_colonies',
       'percent_lost', 'added_colonies', 'renovated_colonies',
       'percent_renovated', 'quarter', 'year', 'varroa_mites',
       'other_pests_and_parasites', 'diseases', 'pesticides', 'other',
       'unknown']]
df

Unnamed: 0,state,num_colonies,max_colonies,lost_colonies,percent_lost,added_colonies,renovated_colonies,percent_renovated,quarter,year,varroa_mites,other_pests_and_parasites,diseases,pesticides,other,unknown
0,Alabama,7000,7000,1800,26,2800,250,4,1,2015,10.0,5.4,0.0,2.2,9.1,9.4
1,Arizona,35000,35000,4600,13,3400,2100,6,1,2015,26.9,20.5,0.1,0.0,1.8,3.1
2,Arkansas,13000,14000,1500,11,1200,90,1,1,2015,17.6,11.4,1.5,3.4,1.0,1.0
3,California,1440000,1690000,255000,15,250000,124000,7,1,2015,24.7,7.2,3.0,7.5,6.5,2.8
4,Colorado,3500,12500,1500,12,200,140,1,1,2015,14.6,0.9,1.8,0.6,2.6,5.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1448,West Virginia,7500,8000,1100,14,0,220,3,4,2022,33.4,3.8,0.8,0.0,6.4,0.5
1449,Wisconsin,26000,47000,3500,7,140,380,1,4,2022,23.2,21.4,19.4,17.5,9.9,11.7
1450,Wyoming,19500,21000,3200,15,640,0,0,4,2022,22.9,5.9,4.2,0.0,0.0,7.4
1451,Other,30030,30030,480,2,1190,130,0,4,2022,22.4,18.5,0.0,0.0,0.0,0.7


In [5]:
df_chart = df[df.state != 'United States']
df_chart

Unnamed: 0,state,num_colonies,max_colonies,lost_colonies,percent_lost,added_colonies,renovated_colonies,percent_renovated,quarter,year,varroa_mites,other_pests_and_parasites,diseases,pesticides,other,unknown
0,Alabama,7000,7000,1800,26,2800,250,4,1,2015,10.0,5.4,0.0,2.2,9.1,9.4
1,Arizona,35000,35000,4600,13,3400,2100,6,1,2015,26.9,20.5,0.1,0.0,1.8,3.1
2,Arkansas,13000,14000,1500,11,1200,90,1,1,2015,17.6,11.4,1.5,3.4,1.0,1.0
3,California,1440000,1690000,255000,15,250000,124000,7,1,2015,24.7,7.2,3.0,7.5,6.5,2.8
4,Colorado,3500,12500,1500,12,200,140,1,1,2015,14.6,0.9,1.8,0.6,2.6,5.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1447,Washington,84000,89000,7500,8,540,220,0,4,2022,34.0,6.4,0.0,0.0,4.2,8.2
1448,West Virginia,7500,8000,1100,14,0,220,3,4,2022,33.4,3.8,0.8,0.0,6.4,0.5
1449,Wisconsin,26000,47000,3500,7,140,380,1,4,2022,23.2,21.4,19.4,17.5,9.9,11.7
1450,Wyoming,19500,21000,3200,15,640,0,0,4,2022,22.9,5.9,4.2,0.0,0.0,7.4


In [6]:
df_chart = df_chart[df_chart.state != 'Other']
df_chart

Unnamed: 0,state,num_colonies,max_colonies,lost_colonies,percent_lost,added_colonies,renovated_colonies,percent_renovated,quarter,year,varroa_mites,other_pests_and_parasites,diseases,pesticides,other,unknown
0,Alabama,7000,7000,1800,26,2800,250,4,1,2015,10.0,5.4,0.0,2.2,9.1,9.4
1,Arizona,35000,35000,4600,13,3400,2100,6,1,2015,26.9,20.5,0.1,0.0,1.8,3.1
2,Arkansas,13000,14000,1500,11,1200,90,1,1,2015,17.6,11.4,1.5,3.4,1.0,1.0
3,California,1440000,1690000,255000,15,250000,124000,7,1,2015,24.7,7.2,3.0,7.5,6.5,2.8
4,Colorado,3500,12500,1500,12,200,140,1,1,2015,14.6,0.9,1.8,0.6,2.6,5.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1446,Virginia,9000,10500,1300,12,300,750,7,4,2022,43.3,20.9,0.0,0.0,6.4,3.9
1447,Washington,84000,89000,7500,8,540,220,0,4,2022,34.0,6.4,0.0,0.0,4.2,8.2
1448,West Virginia,7500,8000,1100,14,0,220,3,4,2022,33.4,3.8,0.8,0.0,6.4,0.5
1449,Wisconsin,26000,47000,3500,7,140,380,1,4,2022,23.2,21.4,19.4,17.5,9.9,11.7


In [7]:
df_chart.to_csv('save_the_bees_charting.csv', index=False,)