In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go


In [2]:
df = pd.read_csv("datasets/corn.csv")
df.head(-2)

Unnamed: 0,YEAR,LOC,STATE,COUNTY_CITY,BRAND,NAME,PCODE,YIELD,WATER_REGIME,MOIST,TW,DAYS
0,1982,DND,KS,DONIPHAN,ASGROW,RX 90,992,127,Dryland,17,57,
1,1982,DND,KS,DONIPHAN,FUNK'S,G-4578,1661,135,Dryland,17,58,
2,1982,DND,KS,DONIPHAN,FUNK'S,G-4673A,1658,110,Dryland,19,57,
3,1982,DND,KS,DONIPHAN,FONTANELLE,680,1656,135,Dryland,21,58,
4,1982,DND,KS,DONIPHAN,FONTANELLE,690,1655,126,Dryland,20,56,
...,...,...,...,...,...,...,...,...,...,...,...,...
25250,2023,,KS,FRANKLIN,INDIGO,CONTROL,5784,161,Dryland,13,60,0.0
25251,2023,,KS,FRANKLIN,LEWIS,17DP651,5780,170,Dryland,14,59,0.0
25252,2023,,KS,FRANKLIN,LEWIS,11DT912,5801,145,Dryland,13,59,0.0
25253,2023,,KS,FRANKLIN,NK,NK1188-AA,5812,160,Dryland,13,59,0.0


In [51]:
col1, col2 = ('YEAR', 'NAME')
df = df[df["YEAR"] == 2019]
avg = df.groupby("COUNTY")["YIELD"].mean()
df = df[df[col2].isin(["150PRX", "349PR"])][[
    col2, 'WATER_REGIME', 'COUNTY', 'YIELD']]

In [52]:
df

Unnamed: 0,NAME,WATER_REGIME,COUNTY,YIELD
19371,349PR,Dryland,Pottawatomie,171.16025
19379,349PR,Dryland,Republic,196.333333
19406,150PRX,Dryland,Franklin,127.990069
19411,150PRX,Dryland,Labette,128.904078
19445,349PR,Dryland,Riley,167.828825


In [53]:
all_names = df['NAME'].unique()
all_counties = df['COUNTY'].unique()

new_df = pd.DataFrame([(name, county, 0, None) for name in all_names for county in all_counties],
                      columns=['NAME', 'COUNTY', 'YIELD', 'WATER_REGIME'])

# Merge the new DataFrame with the original DataFrame and fill NaN values
result_df = pd.merge(new_df, df, on=['NAME', 'COUNTY'], how='left').fillna(0)
result_df = result_df[['NAME', 'COUNTY', 'YIELD_y', 'WATER_REGIME_y']]
result_df.columns = ['NAME', 'COUNTY', 'YIELD', 'WATER_REGIME']
df = result_df
yield_max = df.YIELD.max()
df.YIELD = df.YIELD.replace(0.0, yield_max+50)
df.WATER_REGIME = df.WATER_REGIME.replace(0, "no_record")
df

Unnamed: 0,NAME,COUNTY,YIELD,WATER_REGIME
0,349PR,Pottawatomie,171.16025,Dryland
1,349PR,Republic,196.333333,Dryland
2,349PR,Franklin,246.333333,no_record
3,349PR,Labette,246.333333,no_record
4,349PR,Riley,167.828825,Dryland
5,150PRX,Pottawatomie,246.333333,no_record
6,150PRX,Republic,246.333333,no_record
7,150PRX,Franklin,127.990069,Dryland
8,150PRX,Labette,128.904078,Dryland
9,150PRX,Riley,246.333333,no_record


In [54]:
avg = pd.DataFrame(avg).reset_index()
avg.columns = ['COUNTY', 'avg']
avg

Unnamed: 0,COUNTY,avg
0,Doniphan,234.972879
1,Ellis,71.277996
2,Franklin,156.828113
3,Labette,140.777457
4,Pottawatomie,179.729713
5,Republic,175.72807
6,Riley,189.147318
7,Saline,194.8443
8,Shawnee,227.54886
9,Thomas,138.518967


In [56]:
color_map = {'Irrigated': 'darkblue', 'Dryland': 'orange', 'no_record': '#343541'}

fig = px.bar(df, x='COUNTY', y='YIELD',
                color_discrete_map=color_map,
                facet_col=col2,
                color='WATER_REGIME', barmode='group',
                labels={'NAME': 'Name', 'YIELD': f'Yield',
                        'WATER_REGIME': 'Water Regime', 'YEAR': 'Year', 'COUNTY': 'County', "no_record": "No Record"})

fig.for_each_annotation(lambda a: a.update(
    text=a.text.replace("Name=", ""))
)
fig.for_each_xaxis(lambda x: x.update({'title': ''}))

# avg line
for county in df['COUNTY'].unique():
    curr = avg[avg['COUNTY'] == county]
    # fig.add_hline(x=10, y=curr.avg.iloc[0], line_dash="dot", row="all", col="all")
    fig.add_trace(px.scatter(curr, x='COUNTY', y='avg').data[0], row='all', col='all')
    fig.add_shape(
        type='circle',
        x0=county,
        x1=county,
        y0=curr['avg'].min() - 1,
        y1=curr['avg'].min() + 1,
        line=dict(color='violet', width=250),
        row="all",
        col="all"
    )


fig.update_layout(yaxis=dict(range=[0, yield_max + 10]))

fig.show()