<a href="https://colab.research.google.com/github/Bourbon-Rye/Baesian-Cropability/blob/main/PilipiNuts_2023_Baesian_Cropability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [2]:

# Minimal Setup
import numpy as np
import pandas as pd
import geopandas as gpd

import matplotlib as mpl
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import seaborn as sns

import datetime

import plotly.express as px
import plotly.graph_objects as go
import json

# Baesian Plots


## Yenzy Plots

In [4]:
# @title Rice Price and Cropyield Scatterplot (Annual)
df1 = pd.read_csv("datasets/agricultural-indicators/Volume_Rice and Corn_annual.csv", skiprows=2)
df1 = df1[(df1["Ecosystem/Croptype"] == "Palay") & (df1["Geolocation"] == "PHILIPPINES")]
def renamer(s: str):
    if s == "Ecosystem/Croptype":
        return "Commodity"
    elif "Annual" in s:
        return s.split()[0]
    else:
        return s
df1.rename(renamer, axis=1, inplace=True)
df2 = pd.read_csv("datasets/prices/precleaned/prices_retail_2012-2023.csv")
for year in range(2012, 2024):
    df2[str(year)] = df2.filter(regex=str(year), axis=1).iloc[:, 2:].mean(axis=1)
    df2.drop(df2.filter(regex=f"{year} ").columns, axis = 1, inplace = True)

df1 = pd.concat([df1.iloc[:, 0], df1.loc[:, "2012":]], axis=1)

df2 = df2[(df2.Commodity == "RICE, WELL-MILLED, 1 KG") & (df2.Geolocation == "PHILIPPINES")].iloc[:, 1:]

df = pd.concat([df1, df2])
df = df.set_index("Commodity").T
df = df.reset_index(names='Year')
df.columns = ["Year", "Volume Produced", "Retail Price"]
df

fig = px.scatter(df, x="Volume Produced", y="Retail Price",
                  hover_data=['Year'], text="Year", title="Rice: Volume Produced vs Retail Price (2012-2018)<br>National averages")
fig.update_traces(textposition='top center')
fig.show()

In [None]:
# @title Rice Price and Cropyield Scatterplot (Quarterly)
import pandas as pd
import plotly as px
df1 = pd.read_csv("datasets/agricultural-indicators/Volume_Rice and Corn_quarterly.csv", skiprows=2)
df1 = df1[(df1["Ecosystem/Croptype"] == "Palay") & (df1["Geolocation"] == "PHILIPPINES")]
def renamer(s: str):
    if s == "Ecosystem/Croptype":
        return "Commodity"
    elif "Annual" in s:
        return s.split()[0]
    else:
        return s
df1.rename(renamer, axis=1, inplace=True)
df2 = pd.read_csv("datasets/prices/precleaned/prices_retail_2012-2023.csv")
df2 = df2[(df2.Commodity == "RICE, WELL-MILLED, 1 KG") & (df2.Geolocation == "PHILIPPINES")]
for year in range(2012, 2024):
    for q in range(0, 12, 3):
        # print(year, q//3+1, q, q+3)
        df2[f"{year} Quarter {q//3+1}"] = df2.filter(regex=str(year), axis=1).iloc[:, q:q+3].mean(axis=1)
df2 = df2.filter(regex="Quarter|Commodity", axis=1)

df1 = pd.concat([df1.iloc[:, 0], df1.loc[:, "2012 Quarter 1":]], axis=1)
df = pd.concat([df1, df2])
df

df = df.set_index("Commodity").T
df = df.reset_index(names='Year Quarter')
df.columns = ["Year Quarter", "Volume Produced", "Retail Price"]
df.insert(loc=1, column="Quarter", value=df['Year Quarter'].str.extract(r'Quarter (.*)'))
df.insert(loc=1, column="Year", value=df['Year Quarter'].str.extract(r'(.*) Quarter'))
df.drop(["Year Quarter"], axis=1, inplace=True)

fig = px.scatter(df, x="Volume Produced", y="Retail Price", color="Quarter",
                  title="Rice: Volume Produced vs Retail Price (2012-2018)<br>National averages")
fig.update_traces(textposition='top center')
fig.show()

In [None]:
# PROVINCIAL RICE: VOLUME VS RETAIL PRICE (QUARTERLY, 2012-2023)
def geolocation_inator(df: pd.DataFrame, loc: str|None):
    if (loc == "Provincial"):
        df = df[df["Geolocation"].str.startswith("....")]
    elif (loc == "Regional"):
        df = df[df["Geolocation"].str.startswith("..")]
    elif (loc == "National"):
        df = df[df["Geolocation"] == "PHILIPPINES"]
    df = df.reset_index(drop=True)
    df["Geolocation"] = df["Geolocation"].str.lstrip(".")
    return df


def renamer(s: str):
    if s == "Ecosystem/Croptype":
        return "Commodity"
    elif "Annual" in s:
        return s.split()[0]
    else:
        return s


# NOTE: The ldots are actually useful, we can use them in creating national, regional, and provincial geolocations
df1 = pd.read_csv("datasets/agricultural-indicators/Volume_Rice and Corn_quarterly.csv", skiprows=2)
df1.dropna(inplace=True)
df1 = geolocation_inator(df1, "Provincial")
df1.insert(2, "Type", "Volume")
df1.rename(renamer, axis=1, inplace=True)
df1 = df1[(df1.Commodity == "Palay")]
df1 = pd.concat([df1.iloc[:, [1,0,2]], df1.iloc[:, 3:]], axis=1)
df1

df2 = pd.read_csv("datasets/prices/precleaned/prices_retail_2012-2023.csv")
df2.dropna(inplace=True)
df2 = geolocation_inator(df2, "Provincial")
df2.insert(2, "Type", "Retail Price")
df2 = df2[(df2.Commodity == "RICE, WELL-MILLED, 1 KG")]
df2.Commodity = "Palay"
df1

for year in range(2012, 2024):
    for q in range(0, 12, 3):
        # print(year, q//3+1, q, q+3)
        df2[f"{year} Quarter {q//3+1}"] = df2.filter(regex=str(year), axis=1).iloc[:, q:q+3].mean(axis=1)
df2 = df2.filter(regex="Geolocation|Type|Quarter|Commodity", axis=1)

df1 = pd.concat([df1.iloc[:, 0:3], df1.loc[:, "2012 Quarter 1":]], axis=1)
df = pd.concat([df1, df2])
df.sort_values(by=["Geolocation", "Type"], inplace=True)
df.reset_index(drop=True, inplace=True)

# REMOVING PROVINCES WITHOUT PAIR
to_drop = []
i = 0
for province in df["Geolocation"].unique():
    test = df[df["Geolocation"] == province]
    # print(check.iloc[:, 0:1])
    # print(i, province, test.shape)
    if test.shape[0] == 1:
        df.drop(index=[int(test.index[0])], axis = 0, inplace=True)

# # PLOTTING
df = df.T
df.drop(["Commodity"], axis=0, inplace=True)
# df = df.iloc[:, 0:2]
# df.insert(0, "Province", df.iloc[0,0])
# df.drop(["Geolocation", "Type"], inplace=True)
# df.columns = ["Province", "Volume Produced", "Retail Price"]

tdf_acc = pd.DataFrame(columns=["Province", "Retail Price", "Volume"])
for i in range(0, len(df.columns), 2):
    tdf = df.iloc[:, i:i+2]
    tdf.insert(0, "Province", tdf.iloc[0,0])
    tdf = tdf.drop(["Geolocation", "Type"], axis=0)
    tdf.columns = ["Province", "Retail Price", "Volume"]
    if i != 0:
        tdf_acc = pd.concat([tdf, tdf_acc])
    if i == 130:
        break
        
df = tdf_acc
df
        
fig = px.scatter(df, x="Volume", y="Retail Price", color="Province",
                    title="Rice: Volume Produced vs Retail Price (2012-2018)<br>National quarterly averages")
fig.update_traces(textposition='top center')
fig.update_layout(
    xaxis_title="Volume Produced (tons)", yaxis_title="Retail Price (PHP)"
)
fig.show()
fig.write_html("provincial_rice.html")
    
# for province in df["Geolocation"].unique():
#     plot_df = df[df["Geolocation"] == province]
#     if plot_df.shape[0] == 1:
#         continue
#     plot_df = plot_df.T.drop(["Commodity"], axis=0)
#     plot_df.drop(["Geolocation", "Type"], inplace=True)
#     plot_df.columns = ["Volume Produced", "Retail Price"]
#     fig = px.scatter(plot_df, x="Volume Produced", y="Retail Price",
#                       title=f"Rice in {province}: Volume Produced vs Retail Price (2012-2018)<br>National quarterly averages")
#     fig.update_traces(textposition='top center')
#     fig.update_layout(
#         xaxis_title="Volume Produced (tons)", yaxis_title="Retail Price (PHP)"
#     )
#     fig.show()
#     # fig = go.Figure(data = fig1.data + fig2.data)

tdf_acc


In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

df1 = pd.read_csv("datasets/agricultural-indicators/Volume_Rice and Corn_annual.csv", skiprows=2)
df1 = df1[(df1["Ecosystem/Croptype"] == "Palay") & (df1["Geolocation"] == "PHILIPPINES")]
def renamer(s: str):
    if s == "Ecosystem/Croptype":
        return "Commodity"
    elif "Annual" in s:
        return s.split()[0]
    else:
        return s
df1.rename(renamer, axis=1, inplace=True)
df2 = pd.read_csv("datasets/prices/precleaned/prices_retail_2012-2023.csv")
for year in range(2012, 2024):
    df2[str(year)] = df2.filter(regex=str(year), axis=1).iloc[:, 2:].mean(axis=1)
    df2.drop(df2.filter(regex=f"{year} ").columns, axis = 1, inplace = True)

df1 = pd.concat([df1.iloc[:, 0], df1.loc[:, "2012":]], axis=1)

df2 = df2[(df2.Commodity == "RICE, WELL-MILLED, 1 KG") & (df2.Geolocation == "PHILIPPINES")].iloc[:, 1:]

df = pd.concat([df1, df2])
df = df.set_index("Commodity").T
df = df.reset_index(names='Year')
df.columns = ["Year", "Volume Produced", "Retail Price"]
df

# Add traces
fig.add_trace(
    go.Scatter(x=df["Year"], y=df["Retail Price"], name="Retail Price"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df["Year"], y=df["Volume Produced"], name="Volume Produced"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Rice in the Philippines: Retail Price and Volume of Production (2012-2023)<br>Annual averages for Retail Price of Well-Milled Rice, and Volume of Irrigated Palay"
)

# Set x-axis title
fig.update_xaxes(title_text="Year")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Retail Price</b> (PHP)", secondary_y=False)
fig.update_yaxes(title_text="<b>Volume of Production</b> (tons)", secondary_y=True)

fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

df1 = pd.read_csv("datasets/agricultural-indicators/Volume_Rice and Corn_quarterly.csv", skiprows=2)
df1 = df1[(df1["Ecosystem/Croptype"] == "Palay") & (df1["Geolocation"] == "PHILIPPINES")]
def renamer(s: str):
    if s == "Ecosystem/Croptype":
        return "Commodity"
    elif "Annual" in s:
        return s.split()[0]
    else:
        return s
df1.rename(renamer, axis=1, inplace=True)
df2 = pd.read_csv("datasets/prices/precleaned/prices_retail_2012-2023.csv")
df2 = df2[(df2.Commodity == "RICE, WELL-MILLED, 1 KG") & (df2.Geolocation == "PHILIPPINES")]
for year in range(2012, 2024):
    for q in range(0, 12, 3):
        # print(year, q//3+1, q, q+3)
        df2[f"{year} Quarter {q//3+1}"] = df2.filter(regex=str(year), axis=1).iloc[:, q:q+3].mean(axis=1)
df2 = df2.filter(regex="Quarter|Commodity", axis=1)

df1 = pd.concat([df1.iloc[:, 0], df1.loc[:, "2012 Quarter 1":]], axis=1)
df = pd.concat([df1, df2])
df

df = df.set_index("Commodity").T
df = df.reset_index(names='Year Quarter')
df.columns = ["Year Quarter", "Volume Produced", "Retail Price"]
df.insert(loc=1, column="Quarter", value=df['Year Quarter'].str.extract(r'Quarter (.*)'))
df.insert(loc=1, column="Year", value=df['Year Quarter'].str.extract(r'(.*) Quarter'))

# Add traces
fig.add_trace(
    go.Scatter(x=df["Year Quarter"], y=df["Retail Price"], name="Retail Price"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df["Year Quarter"], y=df["Volume Produced"], name="Volume Produced"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Rice in the Philippines: Retail Price and Volume of Production (2012-2023)<br>Quarterly averages for Retail Price of Well-Milled Rice, and Volume of Irrigated Palay"
)

# Set x-axis title
fig.update_xaxes(title_text="Year")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Retail Price</b> (PHP)", secondary_y=False)
fig.update_yaxes(title_text="<b>Volume of Production</b> (tons)", secondary_y=True)

fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# NOTE: The ldots are actually useful, we can use them in creating national, regional, and provincial geolocations
df1 = pd.read_csv("datasets/agricultural-indicators/Volume_Rice and Corn_quarterly.csv", skiprows=2)
df1.dropna(inplace=True)
df1 = geolocation_inator(df1, "Provincial")
df1.insert(2, "Type", "Volume")
df1.rename(renamer, axis=1, inplace=True)
df1 = df1[(df1.Commodity == "Palay")]
df1 = pd.concat([df1.iloc[:, [1,0,2]], df1.iloc[:, 3:]], axis=1)
df1

df2 = pd.read_csv("datasets/prices/precleaned/prices_retail_2012-2023.csv")
df2.dropna(inplace=True)
df2 = geolocation_inator(df2, "Provincial")
df2.insert(2, "Type", "Retail Price")
df2 = df2[(df2.Commodity == "RICE, WELL-MILLED, 1 KG")]
df2.Commodity = "Palay"
df1

for year in range(2012, 2024):
    for q in range(0, 12, 3):
        # print(year, q//3+1, q, q+3)
        df2[f"{year} Quarter {q//3+1}"] = df2.filter(regex=str(year), axis=1).iloc[:, q:q+3].mean(axis=1)
df2 = df2.filter(regex="Geolocation|Type|Quarter|Commodity", axis=1)

df1 = pd.concat([df1.iloc[:, 0:3], df1.loc[:, "2012 Quarter 1":]], axis=1)
df = pd.concat([df1, df2])
df.sort_values(by=["Geolocation", "Type"], inplace=True)
df.reset_index(drop=True, inplace=True)

df

# REMOVING PROVINCES WITHOUT PAIR
to_drop = []
i = 0
for province in df["Geolocation"].unique():
    test = df[df["Geolocation"] == province]
    # print(check.iloc[:, 0:1])
    # print(i, province, test.shape)
    if test.shape[0] == 1:
        df.drop(index=[int(test.index[0])], axis = 0, inplace=True)

# # PLOTTING
df = df.T
df.drop(["Commodity"], axis=0, inplace=True)
# df = df.iloc[:, 0:2]
# df.insert(0, "Province", df.iloc[0,0])
# df.drop(["Geolocation", "Type"], inplace=True)
# df.columns = ["Province", "Volume Produced", "Retail Price"]

tdf_acc = pd.DataFrame(columns=["Province", "Retail Price", "Volume"])
for i in range(0, len(df.columns), 2):
    tdf = df.iloc[:, i:i+2]
    tdf.insert(0, "Province", tdf.iloc[0,0])
    tdf = tdf.drop(["Geolocation", "Type"], axis=0)
    tdf.columns = ["Province", "Retail Price", "Volume"]
    if i != 0:
        tdf_acc = pd.concat([tdf, tdf_acc])
    if i == 130:
        break
        
df = tdf_acc
df = df[df["Province"] == "Pangasinan"]

# Add traces
fig.add_trace(
    go.Scatter(x=df.index, y=df["Retail Price"], name="Retail Price"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df.index, y=df["Volume"], name="Volume Produced"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Rice in Pangasinan: Retail Price and Volume of Production (2012-2023)<br>Quarterly averages for Retail Price of Well-Milled Rice, and Volume of Irrigated Palay"
)

# Set x-axis title
fig.update_xaxes(title_text="Year")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Retail Price</b> (PHP)", secondary_y=False)
fig.update_yaxes(title_text="<b>Volume of Production</b> (tons)", secondary_y=True)

fig.show()