# Especialização em Inteligência Artificial

**Aprendizado de Máquina - Web Conf 1: Exemplos práticos**

Código de exemplo desenvolvido pelo docente [Adriano Rivolli](mailto:rivolli@utpfr.edu.br)

*O código a seguir ilustra como carregar um dataset externo, usar regressores e preencher valores ausentes*

## Leitura do dataset

In [None]:
import pandas as pd
from google.colab import drive
drive.mount("/content/drive",force_remount=True)


df = pd.read_csv("/content/drive/MyDrive/UTFPR/aulas/machine learning (especialização)/codigos/webconf/cloud.csv")
df

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop('Target', axis=1)
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.head()

## Regressores

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

### Knn Regressor

In [None]:
knn = KNeighborsRegressor(n_neighbors=3, weights='distance')
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
y_pred

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("MSE:", mse)
print("R2:", r2)

In [None]:
def plot_predictions_vs_actual(actual, predictions, title):
    plt.figure(figsize=(8, 6))
    plt.scatter(actual, predictions, color='blue', label='Predições vs. Real')
    plt.plot(actual, actual, color='red', linestyle='--', label='Linha de Referência')
    plt.title('Previstos vs. Reais: '+title)
    plt.xlabel('Valores Reais')
    plt.ylabel('Valores Previstos')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_predictions_vs_actual(y_test, y_pred, 'KNN')

### Árvore de regressão

In [None]:
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)

y_pred = dt.predict(X_test)
y_pred

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("MSE:", mse)
print("R2:", r2)

plot_predictions_vs_actual(y_test, y_pred, 'DTR')

### SVR

In [None]:
svr = SVR()
svr.fit(X_train, y_train)

y_pred = svr.predict(X_test)
y_pred

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("MSE:", mse)
print("R2:", r2)

plot_predictions_vs_actual(y_test, y_pred, 'SVR')

### Random Forest Regressor

In [None]:
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)
y_pred

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("MSE:", mse)
print("R2:", r2)

plot_predictions_vs_actual(y_test, y_pred, 'SVR')

## Estimando valores ausentes

In [None]:
import numpy as np

def add_missing_values_to_column(df, column_name, missing_percentage):
    num_instances_with_missing_values = int(len(df) * missing_percentage)
    instances_with_missing_values = np.random.choice(df.index, size=num_instances_with_missing_values, replace=False)

    df_copy = df.copy()
    df_copy.loc[instances_with_missing_values, column_name] = np.nan

    return df_copy

attribute = 'period'
mdf = add_missing_values_to_column(df, attribute, 0.1)
mdf.loc[mdf.isna().sum(axis=1) > 0,]

In [None]:
X = mdf.drop([attribute, 'Target'], axis=1)
y = mdf[attribute]

X_train = X.loc[~y.isna()]
y_train = y.loc[~y.isna()]
X_test = X.loc[y.isna()]
y_test = df.loc[y.isna(), attribute]

X_train.head()

In [None]:
model = RandomForestRegressor()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("MSE:", mse)
print("R2:", r2)

plot_predictions_vs_actual(y_test, y_pred, attribute)