In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import numpy as np

# Step 1: Load the CSV data
file_path = '/content/pincodes_all.csv'  # Update this to the correct file path if needed
data = pd.read_csv(file_path, encoding='latin-1') # Changed encoding to latin-1

# Check column names
print(data.columns)

# Step 2: Data Preprocessing
# Handling missing values (fill with forward fill for simplicity)
data.ffill(inplace=True)  # Updated to use ffill() instead of fillna(method='ffill')

# Encoding categorical variables
label_encoders = {}
# Encoding 'Region Name' and 'District' as categorical variables
for column in ['Region Name', 'District']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Define features (X) and target (y) - Using 'Region Name' and 'District' to predict 'Pincode'
X = data[['Region Name', 'District']]
y = data['Pincode']

# Step 3: Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Define and train the Random Forest model with K-Fold Cross-Validation
rf_model = RandomForestClassifier(
    n_estimators=100,     # Number of trees
    max_depth=10,         # Maximum depth of each tree
    min_samples_split=10, # Minimum number of samples to split a node
    min_samples_leaf=4,   # Minimum number of samples in a leaf node
    random_state=42
)

# Step 5: Perform K-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(rf_model, X_train, y_train, cv=kf, scoring='accuracy')
print(f"Cross-validated accuracy scores: {cv_scores}")
print(f"Mean cross-validation accuracy: {cv_scores.mean()}")

# Step 6: Hyperparameter Tuning Using GridSearchCV
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid,
                           cv=kf, n_jobs=-1, scoring='accuracy')

grid_search.fit(X_train, y_train)

# Best hyperparameters and best score
print(f"Best hyperparameters: {grid_search.best_params_}")
print(f"Best cross-validation accuracy: {grid_search.best_score_}")

# Step 7: Train the model with the best parameters on the full training set
best_rf_model = grid_search.best_estimator_
best_rf_model.fit(X_train, y_train)

# Step 8: Evaluate the model on the test set
y_pred = best_rf_model.predict(X_test)

# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {test_accuracy}")


# Print classification report and confusion matrix
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Step 9: Feature Importance Analysis (optional)
importances = best_rf_model.feature_importances_
indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")
for i in range(X_train.shape[1]):
    print(f"{i+1}. Feature {X_train.columns[indices[i]]} ({importances[indices[i]]})")

# Step 10: Predict Pincode for New Entries
def predict_pincode(region_name, district):
    # Encode the new inputs using the same label encoder used during training
    encoded_region = label_encoders['Region Name'].transform([region_name])[0]
    encoded_district = label_encoders['District'].transform([district])[0]

    # Predict the pincode
    predicted_pincode = best_rf_model.predict([[encoded_region, encoded_district]])

    return predicted_pincode[0]

# Example prediction:
region_name_input = "Kurnool Region"  # Replace with actual region name from the dataset
district_input = "ANANTHAPUR"  # Replace with actual district name from the dataset
predicted_pincode = predict_pincode(region_name_input, district_input)
print(f"Predicted Pincode: {predicted_pincode}")


Index(['Circle Name', 'Region Name', 'Division Name', 'Office Name', 'Pincode',
       'OfficeType', 'Delivery', 'District', 'StateName'],
      dtype='object')
Cross-validated accuracy scores: [0.0500482  0.0434608  0.04655366 0.04157294 0.04350271]
Mean cross-validation accuracy: 0.04502766310018789


TerminatedWorkerError: A worker process managed by the executor was unexpectedly terminated. This could be caused by a segmentation fault while calling the function or by an excessive memory usage causing the Operating System to kill the worker.

The exit codes of the workers are {SIGKILL(-9)}

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import numpy as np

# Step 1: Load the CSV data
file_path = '/content/pincodes_all.csv'  # Update this to the correct file path if needed
data = pd.read_csv(file_path, encoding='latin-1') # Changed encoding to latin-1

# Check column names# Filter the dataset for "Head Office" (HO) office types, which might correspond to "GPO"
gpo_offices = data[data['OfficeType'] == 'HO']

# Display the result to check for GPO-like entries
gpo_offices[['Office Name', 'Pincode', 'OfficeType']]


Unnamed: 0,Office Name,Pincode,OfficeType
9,Anantapur H.O,515001,HO
140,Guntakal H.O,515801,HO
534,Chittoor H.O,517001,HO
660,Madanapalle H.O,517325,HO
995,Cuddapah H.O,516001,HO
...,...,...,...
154104,Salkia HO,711106,HO
154456,Jhargram HO,721507,HO
154609,Midnapore HO,721101,HO
155145,Purulia HO,723101,HO


In [None]:
# Import necessary libraries
import pandas as pd
from google.colab import files

# Step 1: Load the CSV data
file_path = '/content/pincodes_all.csv'  # Update this to the correct file path if needed
data = pd.read_csv(file_path, encoding='latin-1')  # Changed encoding to latin-1

# Step 2: Filter the dataset for "Head Office" (HO) office types, which might correspond to "GPO"
gpo_offices = data[data['OfficeType'] == 'GPO']

# Step 3: Display the relevant columns for GPO-like entries (just for visualization)
print(gpo_offices[['Office Name', 'Pincode', 'OfficeType']])

# Step 4: Save the filtered data to a CSV file
output_file_path = '/content/gpo_offices.csv'  # File path in the current environment
gpo_offices.to_csv(output_file_path, index=False)

# Step 5: Download the saved file
files.download(output_file_path)


Empty DataFrame
Columns: [Office Name, Pincode, OfficeType]
Index: []


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Import necessary libraries
import pandas as pd
from google.colab import files

# Step 1: Load the CSV data
file_path = '/content/pincodes_all.csv'  # Update this to the correct file path if needed
data = pd.read_csv(file_path, encoding='latin-1')  # Changed encoding to latin-1

# Step 2: Group data by 'Region Name' and 'OfficeType'
regions = data['Region Name'].unique()  # Get unique regions
office_types = ['HO', 'SO', 'BO']  # Define office types

# Step 3: Iterate over each region and office type, and save separate CSVs
for region in regions:
    for office_type in office_types:
        # Filter data for specific region and office type
        regional_offices = data[(data['Region Name'] == region) & (data['OfficeType'] == office_type)]

        # If there are no records, skip
        if regional_offices.empty:
            continue

        # Create a filename based on region and office type
        file_name = f"{region}_{office_type}_offices.csv".replace(" ", "_")  # Replace spaces with underscores for file names

        # Save the filtered data to CSV
        output_file_path = f'/content/{file_name}'
        regional_offices.to_csv(output_file_path, index=False)

        # Download the CSV file
        files.download(output_file_path)

        # Print a message to confirm
        print(f"Downloaded {file_name}")



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kurnool_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kurnool_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kurnool_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Vijayawada_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Vijayawada_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Vijayawada_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Visakhapatnam_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Visakhapatnam_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Visakhapatnam_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Dibrugarh_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Dibrugarh_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Dibrugarh_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded East_Region,_Bhagalpur_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded East_Region,_Bhagalpur_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded East_Region,_Bhagalpur_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Muzaffarpur_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Muzaffarpur_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Muzaffarpur_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Raipur_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Raipur_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Raipur_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Ahmedabad_HQ_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Ahmedabad_HQ_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Ahmedabad_HQ_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Rajkot_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Rajkot_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Rajkot_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Vadodara_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Vadodara_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Vadodara_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Srinagar_HQ_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Srinagar_HQ_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Srinagar_HQ_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Bengaluru_HQ_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Bengaluru_HQ_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Bengaluru_HQ_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded North_Karnataka_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded North_Karnataka_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded North_Karnataka_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded South_Karnataka_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded South_Karnataka_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded South_Karnataka_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Calicut_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Calicut_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Calicut_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kochi_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kochi_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kochi_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Indore_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Indore_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Indore_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Jabalpur_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Jabalpur_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Jabalpur_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Aurangabad_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Aurangabad_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Aurangabad_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Goa-Panaji_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Goa-Panaji_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Goa-Panaji_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Mumbai_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Mumbai_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Mumbai_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Nagpur_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Nagpur_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Nagpur_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Navi_Mumbai_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Navi_Mumbai_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Navi_Mumbai_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Pune_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Pune_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Pune_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded North_Eastern_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded North_Eastern_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded North_Eastern_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Shillong_HQ_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Shillong_HQ_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Shillong_HQ_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Berhampur_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Berhampur_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Berhampur_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Sambalpur_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Sambalpur_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Sambalpur_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Punjab_West_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Punjab_West_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Punjab_West_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Ajmer_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Ajmer_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Ajmer_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Jodhpur_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Jodhpur_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Jodhpur_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Central_Region,_Trichirapalli_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Central_Region,_Trichirapalli_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Central_Region,_Trichirapalli_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Chennai_City_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Chennai_City_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Chennai_City_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Southern_Region,_Madurai_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Southern_Region,_Madurai_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Southern_Region,_Madurai_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Western_Region,_Coimbatore_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Western_Region,_Coimbatore_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Western_Region,_Coimbatore_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Hyderabad_City_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Hyderabad_City_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Hyderabad_City_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Hyderabad_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Hyderabad_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Hyderabad_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Agra_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Agra_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Agra_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Allahabad_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Allahabad_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Allahabad_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Bareilly_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Bareilly_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Bareilly_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Gorakhpur_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Gorakhpur_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Gorakhpur_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kanpur_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kanpur_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kanpur_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Lucknow__HQ_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Lucknow__HQ_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Lucknow__HQ_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Varanasi_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Varanasi_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Varanasi_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Calcutta_HQ_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Calcutta_HQ_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Calcutta_HQ_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kolkata_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kolkata_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded Kolkata_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded North_Bengal_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded North_Bengal_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded North_Bengal_Region_BO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded South_Bengal_Region_HO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded South_Bengal_Region_SO_offices.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloaded South_Bengal_Region_BO_offices.csv


In [None]:
import pandas as pd
import requests
import time

# Load the dataset
file_path = '/content/vijay_pallu.csv'  # Adjust to the actual file path
df = pd.read_csv(file_path, encoding='ISO-8859-1')

# Function to get latitude and longitude from the pincode using OSM Nominatim API
def get_lat_long_osm(pincode):
    base_url = "https://nominatim.openstreetmap.org/search"
    params = {
        "postalcode": pincode,
        "countrycodes": "IN",  # Set to India, change it as needed
        "format": "json"
    }
    response = requests.get(base_url, params=params, headers={'User-Agent': 'Mozilla/5.0'})
    if response.status_code == 200 and len(response.json()) > 0:
        result = response.json()[0]
        return result['lat'], result['lon']
    return None, None

# Add Latitude and Longitude columns
df['Latitude'] = None
df['Longitude'] = None

# Iterate through unique pin codes to avoid repeated API calls
unique_pincodes = df['Pincode'].unique()

for pincode in unique_pincodes:
    lat, lng = get_lat_long_osm(pincode)

    # Update the corresponding rows with the latitude and longitude
    df.loc[df['Pincode'] == pincode, 'Latitude'] = lat
    df.loc[df['Pincode'] == pincode, 'Longitude'] = lng

    print(f"Processed Pincode: {pincode} | Latitude: {lat}, Longitude: {lng}")

    # Delay to respect API rate limits
    time.sleep(1)  # Add delay to avoid hitting the request limit

# Save the updated dataset with latitudes and longitudes
output_file = 'kurnool_vs_vijayawada.csv'
df.to_csv(output_file, index=False)

print(f"Updated dataset saved to {output_file}")

Processed Pincode: 515004 | Latitude: 14.743604210344827, Longitude: 77.6916881275862
Processed Pincode: 515731 | Latitude: 14.8181186, Longitude: 77.59799345
Processed Pincode: 515002 | Latitude: 14.681397621052632, Longitude: 77.61501318421053
Processed Pincode: 515766 | Latitude: 14.2914167, Longitude: 77.0348529
Processed Pincode: 515415 | Latitude: 14.9489458, Longitude: 78.0290549
Processed Pincode: 515822 | Latitude: 14.8289023, Longitude: 77.2527061
Processed Pincode: 515445 | Latitude: 14.9296529, Longitude: 77.8611195
Processed Pincode: 515001 | Latitude: 14.673444204615384, Longitude: 77.60027021076924
Processed Pincode: 515741 | Latitude: 14.71633745, Longitude: 77.1308029
Processed Pincode: 515775 | Latitude: 14.946338566666666, Longitude: 77.59040526666666
Processed Pincode: 515455 | Latitude: 15.0627154, Longitude: 77.7825672
Processed Pincode: 515465 | Latitude: 14.661599, Longitude: 78.03633915
Processed Pincode: 515751 | Latitude: 14.655567033333334, Longitude: 77.412

In [None]:
import pandas as pd

# Load the CSV data
file_path = '/content/pincodes_all.csv'  # Adjust path if necessary

# Use 'latin-1' encoding to handle special characters
data = pd.read_csv(file_path, encoding='latin-1')

# Step 1: Display the column names to ensure we have the right fields to filter on
print(data.columns)

# Step 2: Filter entries from Kurnool and Vijayawada for 'SO' and 'BO' types
# Added .str.strip() to remove leading/trailing spaces that might cause issues with filtering
kurnool_data = data[
    (data['Region Name'].str.strip() == 'Kurnool') &
    (data['OfficeType'].str.strip().isin(['SO', 'BO']))
]

vijayawada_data = data[
    (data['Region Name'].str.strip() == 'Vijayawada') &
    (data['OfficeType'].str.strip().isin(['SO', 'BO']))
]

# Print the number of rows in each DataFrame to check if they are empty
print(f"Number of rows in kurnool_data: {len(kurnool_data)}")
print(f"Number of rows in vijayawada_data: {len(vijayawada_data)}")

# Step 3: Limit the results to 25 entries from each region
# Only sample if the DataFrames are not empty
if len(kurnool_data) > 0:
    kurnool_sample = kurnool_data.sample(n=25, random_state=42)
else:
    kurnool_sample = pd.DataFrame(columns=kurnool_data.columns) # Create empty DataFrame with same columns

if len(vijayawada_data) > 0:
    vijayawada_sample = vijayawada_data.sample(n=25, random_state=42)
else:
    vijayawada_sample = pd.DataFrame(columns=vijayawada_data.columns) # Create empty DataFrame with same columns

# Step 4: Combine the two samples
combined_sample = pd.concat([kurnool_sample, vijayawada_sample])

# Step 5: Display the combined 50 selected entries
print(combined_sample)

# Step 6: Save the result as a CSV file
output_file_path = '/content/kurnool_vijayawadarrr_sample.csv'
combined_sample.to_csv(output_file_path, index=False)
print(f"CSV file saved: {output_file_path}")



Index(['Circle Name', 'Region Name', 'Division Name', 'Office Name', 'Pincode',
       'OfficeType', 'Delivery', 'District', 'StateName'],
      dtype='object')
Number of rows in kurnool_data: 0
Number of rows in vijayawada_data: 0
Empty DataFrame
Columns: [Circle Name, Region Name, Division Name, Office Name, Pincode, OfficeType, Delivery, District, StateName]
Index: []
CSV file saved: /content/kurnool_vijayawadarrr_sample.csv


In [None]:
!pip install pandas scikit-learn


