In [11]:
import pandas as pd
import numpy as np


np.random.seed(42)

# Genera dataset sintetico
sample_size = 1000

data = {
    "eta": np.random.randint(5, 75, size=sample_size),  # eta gallina (in mesi)
    "razza": np.random.choice(["Leghorn", "Rhode Island Red", "Sussex", "Plymouth Rock"], size=sample_size),  # razza gallina
    "mangime": np.random.choice(["Type A", "Type B", "Type C"], size=sample_size),  # tipo mangime
    "temperatura": np.random.uniform(20, 35, size=sample_size),  # temperatura in Celsius
    "peso": np.random.uniform(1000, 3500, size=sample_size),  # peso (in grammi)
    "q_uova_mensili": np.random.poisson(lam=5, size=sample_size) + np.random.randint(0, 3, size=sample_size)  # quantità uova mensili
}

#crea il DataFrame
df = pd.DataFrame(data)

#Salva il dataframe come file csv
df.to_csv("dataset.csv")

In [12]:
data = pd.read_csv("dataset.csv")

print(data.head())  #reperimento dataset

   Unnamed: 0  eta             razza mangime  temperatura         peso  \
0           0   56  Rhode Island Red  Type A    23.073219  1782.268486   
1           1   19            Sussex  Type A    33.167451  1677.740634   
2           2   65           Leghorn  Type B    33.193728  2494.170707   
3           3   25            Sussex  Type A    33.058676  3165.239084   
4           4   28           Leghorn  Type C    23.581943  3366.834322   

   q_uova_mensili  
0               7  
1               6  
2               3  
3               7  
4               2  


In [13]:
mangime_dict = {"Type A": 1, "Type B": 2, "Type C": 3}
data["mangime"] = data["mangime"].map(mangime_dict)
print(data.head())  #conversione feature categorica in feature numerica

   Unnamed: 0  eta             razza  mangime  temperatura         peso  \
0           0   56  Rhode Island Red        1    23.073219  1782.268486   
1           1   19            Sussex        1    33.167451  1677.740634   
2           2   65           Leghorn        2    33.193728  2494.170707   
3           3   25            Sussex        1    33.058676  3165.239084   
4           4   28           Leghorn        3    23.581943  3366.834322   

   q_uova_mensili  
0               7  
1               6  
2               3  
3               7  
4               2  


In [14]:
razza_dict = {"Leghorn": 1, "Rhode Island Red": 0.5, "Sussex": 0.7, "Plymouth Rock": 0}
data["razza"] = data["razza"].map(razza_dict)
print(data.head())  #conversione feature categorica in feature numerica


   Unnamed: 0  eta  razza  mangime  temperatura         peso  q_uova_mensili
0           0   56    0.5        1    23.073219  1782.268486               7
1           1   19    0.7        1    33.167451  1677.740634               6
2           2   65    1.0        2    33.193728  2494.170707               3
3           3   25    0.7        1    33.058676  3165.239084               7
4           4   28    1.0        3    23.581943  3366.834322               2


In [15]:
from sklearn.model_selection import train_test_split
#divisione dei dati in dati train e dati test con stratificazione
train_set, test_set = train_test_split(data, test_size=0.2, stratify=data["razza"], random_state=42)

In [16]:
#divisione variabili indipendente da variabile dipendente
x_train, y_train = train_set.drop(columns=['q_uova_mensili']), train_set['q_uova_mensili']
x_test, y_test = test_set.drop(columns=['q_uova_mensili']), test_set['q_uova_mensili']

In [17]:
#Calcolo della Varianza 
varianza = np.var(y_train)
print(f"la varianza è : {varianza}")

la varianza è : 4.6679234375


In [18]:
from sklearn.linear_model import LogisticRegression
#creazione modello di regressione multinomiale
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
#fitting del modello
model.fit(x_train, y_train)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [19]:
from sklearn.metrics import classification_report, accuracy_score

y_train_pred = model.predict(x_train)
y_test_pred = model.predict(x_test)

In [20]:
from sklearn.metrics import mean_squared_error

mse_test = mean_squared_error(y_test, y_test_pred)

print(f"Mean Squared Error: {mse_test}")

Mean Squared Error: 6.27
