## Clasificacion

In [None]:
import os
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np


ruta=os.getcwd()
df=pd.read_csv(ruta +'\\data\\customer_churn.csv', index_col=0)

df=df[['Total_Revolving_Bal',
       'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
       'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio',
       'Attrition_Flag']]

df

In [None]:
plt.figure(figsize=(8, 6))

sns.scatterplot(data=df,x='Total_Revolving_Bal', y='Total_Trans_Amt', hue='Attrition_Flag', palette='Set1')

plt.title('Scatterplot de dos variables numéricas con hue binario')
plt.xlabel('Total_Revolving_Bal 1')
plt.ylabel('Total_Trans_Amt 2')
plt.legend(title='Attrition_Flag')
plt.grid(True)
plt.show()

In [None]:
#separacion 
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score

X=df.drop(columns='Attrition_Flag')
y=df['Attrition_Flag']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25, random_state=4)

In [None]:
#modelo-entrenamiento

from sklearn.neighbors import KNeighborsClassifier

knc = KNeighborsClassifier(n_neighbors=3, metric='euclidean', weights='distance')

knc.fit(X_train,y_train)

In [None]:
#predicciones

y_pred=knc.predict(X_test)

In [None]:

#metricas

from sklearn.metrics import accuracy_score

acc=accuracy_score(y_test,y_pred)
acc

In [None]:
k_range = range(1,20)
accuracias = []
for i in k_range:
    knn = KNeighborsClassifier(n_neighbors=i, metric='euclidean', weights='distance')
    scores=cross_val_score(knn, X,y, cv=5, scoring='accuracy')
    accuracias.append(scores.mean())

best_k = k_range[np.argmax(accuracias)]
print(f"El mejor valor de k es: {best_k}")


In [None]:
plt.figure(figsize=(10, 6))
plt.plot(k_range, accuracias, marker='o', linestyle='-', color='b')
plt.xlabel('Valor de K')
plt.ylabel('Precisión Promedio')
plt.title('Precisión vs. Valor de K')
plt.xticks(k_range)
plt.grid(True)
plt.axvline(x=best_k, color='r', linestyle='--', label=f'Mejor K: {best_k}')
plt.legend()
plt.show()

In [None]:
#hiperparamwetros 

param_grid = {
    # 'n_neighboors': [1,3,5,11]
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski'],
    'p': [1,2,3]
}
knc=KNeighborsClassifier(n_neighbors=11)

grid_search = RandomizedSearchCV(
    estimator=knc,
    param_distributions=param_grid,
    cv=5,
    scoring='accuracy',
    verbose=1,
    n_jobs=1

)

grid_search.fit(X_train,y_train)

In [None]:
#obtener los mejores parametros 

best_params=grid_search.best_params_
best_score=grid_search.best_score_ 
best_model=grid_search.best_estimator_ 

print(best_params,best_score,best_model)

## Regresion

In [None]:
import os
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
ruta=os.getcwd()
df=pd.read_csv(ruta +'\\data\\precio_vivienda.csv', index_col=0)
df.reset_index(inplace=True)
df=df[['precio', 'm2', 'antiguedad', 'renta_zona',
       'poblacion_zona']]
df

In [None]:
df.columns

In [None]:
# plt.figure(figsize=(8, 6))

# sns.scatterplot(data=df,x='renta_zona', y='antiguedad', hue='precio', palette='Set1')

# plt.title('Scatterplot de dos variables numéricas con hue binario')
# plt.xlabel('renta_zona 1')
# plt.ylabel('poblacion_zona 2')
# plt.legend(title='precio')
# plt.grid(True)
# plt.show()

In [None]:
#separacion 
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score

X=df.drop(columns='precio')
y=df['precio']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20, random_state=4)

In [None]:
#escalado 

from sklearn.preprocessing import StandardScaler 

scaler=StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

In [None]:
X_train_scaled

In [None]:
#modelo-entrenamiento

from sklearn.neighbors import KNeighborsRegressor

knc = KNeighborsRegressor(n_neighbors=3, metric='euclidean', weights='distance')

knc.fit(X_train_scaled,y_train)

In [None]:
#predicciones

y_pred=knc.predict(X_test)

#metricas

from sklearn.metrics import root_mean_squared_error

acc=root_mean_squared_error(y_test,y_pred)
acc

In [None]:
k_range = range(1,100)
mse = []
for i in k_range:
    knn = KNeighborsRegressor(n_neighbors=i, metric='euclidean', weights='distance')
    scores=cross_val_score(knn, X,y, cv=5, scoring='neg_mean_squared_error')
    mse.append(scores.mean())

best_k = k_range[np.argmax(mse)]
print(f"El mejor valor de k es: {best_k}")

plt.figure(figsize=(10, 6))
plt.plot(k_range, mse, marker='o', linestyle='-', color='b')
plt.xlabel('Valor de K')
plt.ylabel('Precisión Promedio')
plt.title('Precisión vs. Valor de K')
plt.xticks(k_range)
plt.grid(True)
plt.axvline(x=best_k, color='r', linestyle='--', label=f'Mejor K: {best_k}')
plt.legend()
plt.show()

In [None]:
#hiperparamwetros 

param_grid = {
    # 'n_neighboors': [1,3,5,11]
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski'],
    'p': [1,2,3]
}
knc=KNeighborsRegressor(n_neighbors=93)

grid_search = RandomizedSearchCV(
    estimator=knc,
    param_distributions=param_grid,
    cv=5,
    scoring='neg_mean_squared_error',
    verbose=1,
    n_jobs=1

)

grid_search.fit(X_train_scaled,y_train)

In [None]:
#obtener los mejores parametros 

best_params=grid_search.best_params_
best_score=grid_search.best_score_ 
best_model=grid_search.best_estimator_ 

print(best_params,best_score,best_model)