# Sales of Riding Mowers: Scatter Plots & Classification

This notebook analyzes the RidingMowers dataset to:
- Create scatter plots
- Visualize ownership patterns
- Build a logistic regression classifier


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

sns.set(style='whitegrid')

In [None]:
# Load dataset
df = pd.read_csv('https://github.com/TABxSAID/Datasets/blob/master/RidingMowers.csv')

# Display first few rows
df.head()

In [None]:
# Check dataset info
df.info()

In [None]:
# Scatter Plot
plt.figure(figsize=(8,6))

sns.scatterplot(
    data=df,
    x='Income',
    y='Lot_Size',
    hue='Ownership',
    style='Ownership',
    s=100
)

plt.title('Scatter Plot of Income vs Lot Size')
plt.xlabel('Income ($1000s)')
plt.ylabel('Lot Size (1000 sq ft)')
plt.show()

In [None]:
# Convert Ownership to binary
df['Ownership'] = df['Ownership'].map({'Nonowner': 0, 'Owner': 1})

# Define features and target
X = df[['Income', 'Lot_Size']]
y = df['Ownership']

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

print('\nClassification Report:')
print(classification_report(y_test, y_pred))

In [None]:
# Plot Decision Boundary
x_min, x_max = X['Income'].min() - 1, X['Income'].max() + 1
y_min, y_max = X['Lot_Size'].min() - 1, X['Lot_Size'].max() + 1

xx, yy = np.meshgrid(
    np.linspace(x_min, x_max, 100),
    np.linspace(y_min, y_max, 100)
)

Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.figure(figsize=(8,6))
plt.contourf(xx, yy, Z, alpha=0.3, cmap='coolwarm')

sns.scatterplot(
    x='Income',
    y='Lot_Size',
    hue=df['Ownership'],
    data=df,
    palette='coolwarm',
    s=100
)

plt.title('Logistic Regression Decision Boundary')
plt.xlabel('Income ($1000s)')
plt.ylabel('Lot Size (1000 sq ft)')
plt.show()