# ABG Motors - Car Purchase Prediction

This notebook was created as part of a capstone project to predict car purchases in India using a logistic regression model trained on Japanese customer data.

In [None]:
import pandas as pd

jpn_df = pd.read_excel('JPN Data.xlsx')
ind_df = pd.read_excel('IN_Data.xlsx')

print('Japanese Dataset:')
display(jpn_df.head())

print('Indian Dataset:')
display(ind_df.head())

In [None]:
jpn_df = jpn_df.drop('ID', axis=1)
jpn_df['GENDER'] = jpn_df['GENDER'].map({'M': 0, 'F': 1})

print('Missing values in Japanese dataset:')
print(jpn_df.isnull().sum())

X_jpn = jpn_df.drop('PURCHASE', axis=1)
y_jpn = jpn_df['PURCHASE']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

X_train, X_test, y_train, y_test = train_test_split(X_jpn, y_jpn, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

print('\nClassification Report:')
print(classification_report(y_test, y_pred))

coefficients = pd.DataFrame({
    'Feature': X_jpn.columns,
    'Coefficient': model.coef_[0]
})
print('\nModel Coefficients:')
display(coefficients)

In [None]:
import numpy as np
from datetime import datetime

ind_df = ind_df.drop('ID', axis=1)
ind_df['GENDER'] = ind_df['GENDER'].map({'M': 0, 'F': 1})
ind_df['DT_MAINT'] = pd.to_datetime(ind_df['DT_MAINT'], errors='coerce')
today = pd.to_datetime('2025-07-08')
ind_df['AGE_CAR'] = (today - ind_df['DT_MAINT']).dt.days // 30
ind_df = ind_df.drop('DT_MAINT', axis=1)
display(ind_df.head())

In [None]:
features = ['CURR_AGE', 'GENDER', 'ANN_INCOME', 'AGE_CAR']
ind_df['PREDICTED_PURCHASE'] = model.predict(ind_df[features])
potential_buyers = ind_df['PREDICTED_PURCHASE'].sum()
total_customers = len(ind_df)
print(f'Predicted buyers in Indian dataset: {potential_buyers} out of {total_customers}')

ind_df.to_excel('IN_Data_With_Predictions.xlsx', index=False)