In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('/content/credit.csv')
df.head()

Unnamed: 0,Farming_Turnover,Crop_Yields,Market_Demand,Land_Ownership,Land_Size,Weather_Data,Government_Subsidy,Expenses,Livestock_Inventory,Education,Debts_Liabilities,Community_Feedback,Creditworthiness
0,53056,3000,High,Leased,10,Normal,Yes,16698,Cattle,High School,10000,Low,Good
1,51763,2126,Medium,Owned,12,Variable,No,16132,Cattle,College Degree,0,High,Very Good
2,40000,2213,High,Owned,10,Variable,Yes,13545,Cattle,College Degree,2000,High,Good
3,73980,3000,Medium,Owned,12,Variable,No,16950,Cattle,College Degree,0,High,Very Good
4,51503,1737,Low,Owned,8,Normal,No,15840,Cattle,,0,High,Very Good


In [None]:
# Mapping values
mapping = {
    'Market_Demand': {'High': 2, 'Medium': 1, 'Low': 0},
    'Land_Ownership': {'Leased': 1, 'Owned': 2},
    'Weather_Data': {'Normal': 2, 'Unpredictable': 0, 'Variable': 1},
    'Government_Subsidy': {'Yes': 1, 'No': 0},
    'Livestock_Inventory': {'Sheep': 2, 'Pigs': 1, 'Chicken': 0, 'Cattle': 3},
    'Education': {'High School': 2, 'College Degree': 3, 'None': 1},
    'Community_Feedback': {'Low': 0, 'Medium': 1, 'High': 2},
    'Creditworthiness': {'Excellent': 5, 'Very Good': 4, 'Good': 3, 'Average': 2, 'Poor': 1}
}

# Apply mapping to the dataframe
df.replace(mapping, inplace=True)


# df.fillna(999, inplace=True)
# Print the head of the updated dataset
print(df.head())


   Farming_Turnover  Crop_Yields  Market_Demand  Land_Ownership  Land_Size  \
0             53056         3000              2               1         10   
1             51763         2126              1               2         12   
2             40000         2213              2               2         10   
3             73980         3000              1               2         12   
4             51503         1737              0               2          8   

   Weather_Data  Government_Subsidy  Expenses  Livestock_Inventory  Education  \
0             2                   1     16698                    3          2   
1             1                   0     16132                    3          3   
2             1                   1     13545                    3          3   
3             1                   0     16950                    3          3   
4             2                   0     15840                    3          1   

   Debts_Liabilities  Community_Feedback  Cr

In [None]:
# Descriptive Statistics
print("Descriptive Statistics:")
print(df.describe())

# Correlation Analysis
correlation_matrix = df.corr()
print("Correlation Matrix:")
print(correlation_matrix)

# Categorical Data Analysis
categorical_columns = ['Market_Demand', 'Land_Ownership', 'Weather_Data', 'Government_Subsidy', 'Livestock_Inventory', 'Education', 'Community_Feedback', 'Creditworthiness']
for column in categorical_columns:
    print(f"\nCategorical Analysis for {column}:")
    print(df[column].value_counts())

# Expense vs. Turnover Analysis
expense_turnover_analysis = df[['Expenses', 'Farming_Turnover']]
print("Expense vs. Turnover Analysis:")
print(expense_turnover_analysis)


# Creditworthiness Distribution
print("Creditworthiness Distribution:")
print(df['Creditworthiness'].value_counts())


# Educational Impact
education_impact = df.groupby('Education')['Creditworthiness'].mean()
print("Educational Impact:")
print(education_impact)


# Community Feedback Analysis
community_feedback_analysis = df.groupby('Community_Feedback')['Creditworthiness'].mean()
print("Community Feedback Analysis:")
print(community_feedback_analysis)


# Land Ownership and Size Impact
land_impact = df.groupby(['Land_Ownership', 'Land_Size'])['Creditworthiness'].mean()
print("Land Ownership and Size Impact:")
print(land_impact)


# Livestock and Inventory Impact
livestock_impact = df.groupby('Livestock_Inventory')['Creditworthiness'].mean()
print("Livestock and Inventory Impact:")
print(livestock_impact)


# Market Demand and Creditworthiness
market_demand_impact = df.groupby('Market_Demand')['Creditworthiness'].mean()
print("Market Demand and Creditworthiness:")
print(market_demand_impact)


Descriptive Statistics:
       Farming_Turnover  Crop_Yields  Market_Demand  Land_Ownership  \
count       5000.000000   5000.00000    5000.000000     5000.000000   
mean       56300.673000   2336.07280       1.151000        1.630600   
std        11199.540434    484.14733       0.761259        0.482691   
min        40000.000000   1500.00000       0.000000        1.000000   
25%        47940.750000   1976.00000       1.000000        1.000000   
50%        55262.000000   2326.00000       1.000000        2.000000   
75%        62175.500000   2821.25000       2.000000        2.000000   
max        75000.000000   3000.00000       2.000000        2.000000   

         Land_Size  Weather_Data  Government_Subsidy      Expenses  \
count  5000.000000   5000.000000         5000.000000   5000.000000   
mean      8.467600      1.152800            0.594400  15626.112600   
std       2.449602      0.747237            0.491057   2340.474154   
min       5.000000      0.000000            0.000000  12

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Assuming 'creditworthiness' is your target variable
X = df.drop('Creditworthiness', axis=1)
y = df['Creditworthiness']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Logistic Regression model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print the results
print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)


Accuracy: 0.381
Confusion Matrix:
[[118   4   4  38  23]
 [ 22  24   0 123  47]
 [ 35  14   4 109  28]
 [ 15  21   6 118  56]
 [ 24   8  10  32 117]]
Classification Report:
              precision    recall  f1-score   support

           1       0.55      0.63      0.59       187
           2       0.34      0.11      0.17       216
           3       0.17      0.02      0.04       190
           4       0.28      0.55      0.37       216
           5       0.43      0.61      0.51       191

    accuracy                           0.38      1000
   macro avg       0.35      0.38      0.33      1000
weighted avg       0.35      0.38      0.33      1000



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
# Assuming 'model' is your trained Logistic Regression model

# New input data (replace this with actual values)
new_data = {
    'Farming_Turnover': 50000,
    'Crop_Yields': 1500,
    'Land_Size': 10,
    'Expenses': 20000,
    'Debts_Liabilities': 5000,
    'Market_Demand': 'High',
    'Land_Ownership': 'Owned',
    'Weather_Data': 'Normal',
    'Government_Subsidy': 'Yes',
    'Livestock_Inventory': 'Sheep',
    'Education': 'College Degree',
    'Community_Feedback': 'Medium'
}

# Convert input data to DataFrame
new_df = pd.DataFrame([new_data])

# Apply the same mapping to the new input data
new_df.replace(mapping, inplace=True)
new_df.fillna(999, inplace=True)

# Ensure that the columns are in the same order as during training
new_df = new_df[X_train.columns]

# Make prediction
prediction = model.predict(new_df)

# Print the prediction
print("Predicted Creditworthiness:", prediction)

# Map numeric predictions to categories
creditworthiness_mapping = {
    5: 'Excellent',
    4: 'Very Good',
    3: 'Good',
    2: 'Average',
    1: 'Poor'
}

# Map the numeric prediction to the corresponding category
predicted_category = creditworthiness_mapping.get(prediction[0], 'Unknown')

# Print the interpreted prediction
print("Interpreted Creditworthiness:", predicted_category)


Predicted Creditworthiness: [2]
Interpreted Creditworthiness: Average
