# üß† An√°lisis de Modelos Predictivos con Veh√≠culos
### Stiven Herrera ‚Äì UNAD 2025 ‚Äì An√°lisis de Datos

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix, classification_report

import warnings
warnings.filterwarnings('ignore')


In [None]:
df = pd.read_csv("car data.csv")
df.head()


## üìä An√°lisis Exploratorio

In [None]:
print(df.info())
print(df.describe())
print(df.isnull().sum())
sns.heatmap(df.corr(), annot=True)
plt.show()


## üßπ Preprocesamiento de Datos

In [None]:
# Codificar variables categ√≥ricas
le = LabelEncoder()
df['Fuel_Type'] = le.fit_transform(df['Fuel_Type'])
df['Seller_Type'] = le.fit_transform(df['Seller_Type'])
df['Transmission'] = le.fit_transform(df['Transmission'])

# Separar variables
X = df.drop(['Selling_Price', 'Car_Name'], axis=1)
y = df['Selling_Price']


## üìà Modelo de Regresi√≥n Lineal

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(X_test)

print("MSE:", mean_squared_error(y_test, y_pred))
plt.scatter(y_test, y_pred)
plt.xlabel("Precio real")
plt.ylabel("Precio predicho")
plt.title("Regresi√≥n Lineal - Predicci√≥n de precios")
plt.show()


## üìä Modelo de Regresi√≥n Log√≠stica

In [None]:
# Clasificaci√≥n binaria: Precio mayor a 5 = 1, si no = 0
y_class = (df['Selling_Price'] > 5).astype(int)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y_class, test_size=0.2, random_state=42)

log_model = LogisticRegression()
log_model.fit(X_train2, y_train2)
y_pred2 = log_model.predict(X_test2)

print(classification_report(y_test2, y_pred2))
sns.heatmap(confusion_matrix(y_test2, y_pred2), annot=True)
plt.title("Matriz de Confusi√≥n - Regresi√≥n Log√≠stica")
plt.show()


## üå≥ Modelo √Årbol de Decisi√≥n (Regresi√≥n)

In [None]:
tree_model = DecisionTreeRegressor()
tree_model.fit(X_train, y_train)
y_pred_tree = tree_model.predict(X_test)

print("MSE √Årbol:", mean_squared_error(y_test, y_pred_tree))
plt.scatter(y_test, y_pred_tree)
plt.xlabel("Precio real")
plt.ylabel("Precio predicho")
plt.title("√Årbol de Decisi√≥n - Predicci√≥n de precios")
plt.show()


## ‚úÖ Conclusi√≥n
- Se construyeron modelos de Regresi√≥n Lineal, Log√≠stica y √Årbol de Decisi√≥n.
- El modelo de √°rbol de decisi√≥n puede adaptarse mejor a relaciones no lineales.
- Este an√°lisis es √∫til para predecir precios de veh√≠culos en funci√≥n de caracter√≠sticas t√©cnicas y de mercado.

---
üìÖ Proyecto realizado por **Stiven Herrera**
üìö Universidad Nacional Abierta y a Distancia ‚Äì UNAD
üóìÔ∏è 2025-04-05