## 1.Creation of In-app Synthetic Data

In [1]:
import pandas as pd
import numpy as np

np.random.seed(42)


n_farmers = 100

data = {
    "Farmer_ID": np.arange(1, n_farmers + 1),
    "Crop_Management": np.random.randint(50, 100, n_farmers),  # Efficiency score (0-100)
    "Avg_Income": np.random.randint(10000, 3000000, n_farmers),  # Annual income in KSH
    "Yield_Efficiency": np.random.randint(500, 2000, n_farmers),  # Yield in kg/acre
    "Record_Keeping": np.random.randint(40, 100, n_farmers),  # Record-keeping score (0-100)
    "Farm_Size": np.random.randint(1, 20, n_farmers),  # Total farm size in acres
    "Active_Land": np.random.randint(1, 20, n_farmers),  # Active land in acres
    "Crop_Type_Score": np.random.choice([65, 70, 75, 78, 80], n_farmers),  # Composite score
    "Irrigation_Score": np.random.randint(30, 100, n_farmers),  # Irrigation score (0-100)
    "Farm_Inputs": np.random.randint(50, 100, n_farmers),  # Farm inputs score (0-100)
    "Weather_Risk": np.random.randint(10, 80, n_farmers),  # Weather risk score (0-100)
}

# Create DataFrame
df = pd.DataFrame(data)

# Ensure Active Land <= Farm Size
df["Active_Land"] = np.where(df["Active_Land"] > df["Farm_Size"], df["Farm_Size"], df["Active_Land"])

# Normalize Active Land to Farm Size ratio
df["Active_Land_Ratio"] = df["Active_Land"] / df["Farm_Size"]

# Save to CSV
df.to_csv("synthetic_farmers_data.csv", index=False)

df.head()

Unnamed: 0,Farmer_ID,Crop_Management,Avg_Income,Yield_Efficiency,Record_Keeping,Farm_Size,Active_Land,Crop_Type_Score,Irrigation_Score,Farm_Inputs,Weather_Risk,Active_Land_Ratio
0,1,88,2371864,1636,57,1,1,80,47,76,33,1.0
1,2,78,1514384,941,41,9,5,65,78,58,15,0.555556
2,3,64,2043643,1063,93,11,11,78,40,75,75,1.0
3,4,92,2240919,1791,74,16,3,65,55,96,13,0.1875
4,5,57,2755815,1909,55,7,1,65,92,71,15,0.142857


## Credit Scoring 

In [2]:
# Defining weights
weights = {
    "Crop_Management": 0.3,
    "Income": 0.25,
    "Yield_Efficiency": 0.25,
    "Record_Keeping": 0.2,
    "Farm_Size": 0.4,
    "Crop_Type_Score": 0.3,
    "Irrigation_Score": 0.3,
    "Weather_Risk": 0.2,
    "Farm_Inputs": 0.1,
}

# Normalizing data
df["Income_Normalized"] = (df["Avg_Income"] / df["Avg_Income"].max()) * 100
df["Yield_Efficiency_Normalized"] = (df["Yield_Efficiency"] / df["Yield_Efficiency"].max()) * 100

# Calculating Farmer Experience Score
df["Experience_Score"] = (
    df["Crop_Management"] * weights["Crop_Management"] +
    df["Income_Normalized"] * weights["Income"] +
    df["Yield_Efficiency_Normalized"] * weights["Yield_Efficiency"] +
    df["Record_Keeping"] * weights["Record_Keeping"]
)

# Calculating Farm Details Score
df["Farm_Details_Score"] = (
    df["Active_Land_Ratio"] * 100 * weights["Farm_Size"] +
    df["Crop_Type_Score"] * weights["Crop_Type_Score"] +
    df["Irrigation_Score"] * weights["Irrigation_Score"]
)

# Calculating Weather Risk Contribution
df["Weather_Risk_Contribution"] = (100 - df["Weather_Risk"]) * weights["Weather_Risk"]

# Calculating Farm Inputs Contribution
df["Farm_Inputs_Contribution"] = df["Farm_Inputs"] * weights["Farm_Inputs"]

# Calculating Total Credit Score (0-100%)
df["Credit_Score_Percentage"] = (
    df["Experience_Score"] * 0.4 +
    df["Farm_Details_Score"] * 0.3 +
    df["Weather_Risk_Contribution"] * 0.2 +
    df["Farm_Inputs_Contribution"] * 0.1
)

# Adding Risk Tier based on percentages
df["Risk_Tier"] = pd.cut(
    df["Credit_Score_Percentage"],
    bins=[0, 50, 70, 100],
    labels=["High Risk (C)", "Medium Risk (B)", "Low Risk (A)"]
)

# Save results
df.to_csv("farmers_credit_scores_percentage.csv", index=False)
df[["Farmer_ID", "Credit_Score_Percentage", "Risk_Tier"]].head(100).sort_values(by = "Credit_Score_Percentage",ascending = False)

Unnamed: 0,Farmer_ID,Credit_Score_Percentage,Risk_Tier
88,89,63.811484,Medium Risk (B)
78,79,63.411978,Medium Risk (B)
92,93,62.644328,Medium Risk (B)
83,84,61.746989,Medium Risk (B)
56,57,59.228737,Medium Risk (B)
...,...,...,...
65,66,40.661302,High Risk (C)
13,14,40.579259,High Risk (C)
33,34,39.806505,High Risk (C)
64,65,39.346143,High Risk (C)
