In [1]:
import pandas as pd 
import numpy as np
import joblib
import streamlit as st
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import metrics

FILE_NAME = "Farmers_Data.csv"

df = pd.read_csv(FILE_NAME, delimiter=",")

In [2]:
df['Gender'] = df['Gender'].map({'Female': 1, 'Male': 0})
df['Credit Eligibility'] = df['Credit Eligibility'].map({'Yes': 1, 'No': 0})
df.head()

Unnamed: 0,Age (years),Gender,Education (years),Farming Experience (years),Distance to Loan Source (km),Loan-to-Asset Ratio,Operating Expenditure to Income Ratio,Outstanding Loan-to-Asset Ratio,Farm Size (ha),Credit Eligibility,Z-Score
0,53,0,10,1,3.1,0.41,0.64,0.53,8.8,1,0.770462
1,39,0,9,18,14.7,0.7,0.56,0.58,10.4,0,-1.404712
2,32,0,13,32,8.9,0.57,0.6,0.12,10.7,1,0.413601
3,45,0,12,11,11.7,0.57,0.62,0.47,14.1,0,-0.370764
4,43,0,7,21,12.9,0.54,0.56,0.55,10.4,1,-0.727027


In [3]:
X=df[['Age (years)','Gender','Education (years)','Farming Experience (years)','Distance to Loan Source (km)','Loan-to-Asset Ratio','Operating Expenditure to Income Ratio','Outstanding Loan-to-Asset Ratio','Farm Size (ha)']]
y=df['Credit Eligibility']

In [4]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.2,random_state=0)

In [5]:
clf=LDA()
clf.fit(X_train,y_train)
clf.score(X_train,y_train) 

0.61875

In [6]:
 y_pred=clf.predict(X_test)

In [7]:
y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0], dtype=int64)

In [8]:
 print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.54      0.78      0.64        18
           1       0.71      0.45      0.56        22

    accuracy                           0.60        40
   macro avg       0.63      0.62      0.60        40
weighted avg       0.64      0.60      0.59        40



In [9]:
coefficients = clf.coef_
feature_importance = pd.DataFrame({'Feature': X.columns, 'Coefficient': coefficients[0]})
print(feature_importance)

                                 Feature  Coefficient
0                            Age (years)     0.003642
1                                 Gender     0.160612
2                      Education (years)     0.062973
3             Farming Experience (years)     0.007006
4           Distance to Loan Source (km)    -0.057132
5                    Loan-to-Asset Ratio    -2.099648
6  Operating Expenditure to Income Ratio    -1.669499
7        Outstanding Loan-to-Asset Ratio    -0.189045
8                         Farm Size (ha)     0.003658


In [11]:
# Calculate Z-scores
z_scores = clf.transform(X)

# Create a DataFrame for visualization
z_scores_df = pd.DataFrame({'Applicant': range(1, len(z_scores) + 1), 'Z-Score': z_scores.flatten()})
print(z_scores_df)

     Applicant   Z-Score
0            1  0.770462
1            2 -1.404712
2            3  0.413601
3            4 -0.370764
4            5 -0.727027
..         ...       ...
195        196 -1.156741
196        197  1.633243
197        198 -2.414914
198        199  2.027108
199        200  0.196382

[200 rows x 2 columns]


In [12]:
z_scores_df.head()

Unnamed: 0,Applicant,Z-Score
0,1,0.770462
1,2,-1.404712
2,3,0.413601
3,4,-0.370764
4,5,-0.727027


In [13]:
# Convert Z-scores to a list
z_scores_list = z_scores.flatten().tolist()

# Print the list
print()





In [14]:
# Create a DataFrame for Z-scores
z_scores_df = pd.DataFrame({
    'Applicant': range(1, len(z_scores) + 1),
    'Z-Score': z_scores.flatten()
})

# Save the Z-scores to an Excel file
z_scores_df.to_excel("z_scores_output.xlsx", index=False)

print("Z-scores saved to 'z_scores_output.xlsx'")


Z-scores saved to 'z_scores_output.xlsx'


In [15]:
# Save the Z-scores to a CSV file
z_scores_df.to_csv("z_scores_output.csv", index=False)

print("Z-scores saved to 'z_scores_output.csv'")


Z-scores saved to 'z_scores_output.csv'


In [35]:
# Save the model
joblib.dump(clf, 'discriminant_analysis_model.pkl')


['discriminant_analysis_model.pkl']

In [17]:
# Load the trained model
#model = joblib.load('discriminant_analysis_model.pkl')
model = clf

# Streamlit app title
st.title("Discriminant Analysis Classifier")

# User input form
st.header("Input Your Data")

# 1. Age (years)
age = st.number_input("Age (years)", min_value=0, max_value=120, step=1)

# 2. Gender
gender = st.radio("Gender", options=["Male", "Female"])
gender_value = 0 if gender == "Male" else 1  # Convert to numerical value

# 3. Education (years)
education = st.number_input("Education (years)", min_value=0, max_value=16, step=1)

# Ensure education is less than age
if education >= age:
    st.warning("Education value must be less than Age. Please adjust your input.")

# 4. Farming Experience (years)
farming_experience = st.number_input("Farming Experience (years)", min_value=0, step=1)

# Ensure farming experience is less than age by at least 15 years
if farming_experience >= age - 15:
    st.warning("Farming Experience must be at least 15 years less than Age. Please adjust your input.")

# 5. Distance to Loan Source (km)
distance_to_loan_source = st.number_input("Distance to Loan Source (km)", min_value=0.0, step=0.1)

# 6. Loan to Asset Ratio
st.subheader("Loan to Asset Ratio")
loan_value = st.number_input("Loan Value", min_value=0.0, step=0.1)
asset_value = st.number_input("Asset Value", min_value=0.1, step=0.1)  # Ensure non-zero
loan_to_asset_ratio = loan_value / asset_value if asset_value > 0 else 0.0

# 7. Operating Expenditure to Income Ratio
st.subheader("Operating Expenditure to Income Ratio")
operating_expenditure = st.number_input("Operating Expenditure", min_value=0.0, step=0.1)
income_value = st.number_input("Income", min_value=0.1, step=0.1)  # Ensure non-zero
opex_to_income_ratio = operating_expenditure / income_value if income_value > 0 else 0.0

# 8. Outstanding Loan to Asset Ratio
st.subheader("Outstanding Loan to Asset Ratio")
outstanding_loan = st.number_input("Outstanding Loan Value", min_value=0.0, step=0.1)
outstanding_loan_to_asset_ratio = outstanding_loan / asset_value if asset_value > 0 else 0.0

# 9. Farm Size
farm_size = st.number_input("Farm Size (acres)", min_value=0.0, step=0.1)

# Validation to ensure all required fields are valid
if st.button("Classify"):
    if education >= age:
        st.error("Please ensure Education value is less than Age.")
    elif farming_experience >= age - 15:
        st.error("Please ensure Farming Experience is at least 15 years less than Age.")
    else:
        # Prepare input data for prediction
        input_data = np.array([[
            age, gender_value, education, farming_experience,
            distance_to_loan_source, loan_to_asset_ratio,
            opex_to_income_ratio, outstanding_loan_to_asset_ratio,
            farm_size
        ]])

        # Make prediction
        prediction = model.predict(input_data)
        prediction_proba = model.predict_proba(input_data)

        # Display results
        st.subheader("Prediction Result")
        st.write(f"The predicted class is: {prediction[0]}")

        st.subheader("Prediction Probabilities")
        st.write(prediction_proba)


2025-01-15 10:00:20.662 
  command:

    streamlit run C:\Users\CHUKWUEMEKA DIKE\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
