# Lagistik regressiya orqali GYM atletlarini darajasini aniqlash

In [1]:
#Kerakli kutub xonalarni o'rnatib olamiz

import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score



In [2]:
# Dataset ni dasturga o'rnatib olamiz
# Muvofiqiyatli o'rnatilgan datasetning ma'lum qismini ko'ramiz

data_f = pd.read_csv('weather_forecast_data.csv')
data_f.head()

Unnamed: 0,Temperature,Humidity,Wind_Speed,Cloud_Cover,Pressure,Rain
0,23.720338,89.592641,7.335604,50.501694,1032.378759,rain
1,27.879734,46.489704,5.952484,4.990053,992.614189,no rain
2,25.069084,83.072843,1.371992,14.855784,1007.23162,no rain
3,23.62208,74.367758,7.050551,67.255282,982.632013,rain
4,20.59137,96.858822,4.643921,47.676444,980.825142,no rain


In [3]:
# dataset haqida ma'lumot olamiz

data_f.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2500 entries, 0 to 2499
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Temperature  2497 non-null   float64
 1   Humidity     2494 non-null   float64
 2   Wind_Speed   2495 non-null   float64
 3   Cloud_Cover  2496 non-null   float64
 4   Pressure     2496 non-null   float64
 5   Rain         2498 non-null   object 
dtypes: float64(5), object(1)
memory usage: 117.3+ KB


In [4]:
# dataset da null value larni tekshirib olamiz

data_f.isnull().sum()

Temperature    3
Humidity       6
Wind_Speed     5
Cloud_Cover    4
Pressure       4
Rain           2
dtype: int64

In [5]:
#Tushib qolgan qiymatlari bor ustunlarni o'zini alohida aniqlash

missing_columns=data_f.isnull().sum()
missing_columns=missing_columns[missing_columns>0]
if not missing_columns.empty:
    print('Tushib qolgan elementlari bor ustunlar:')
    print(missing_columns)
else:
    print('Bundaty ustunlar mavjud emas')







Tushib qolgan elementlari bor ustunlar:
Temperature    3
Humidity       6
Wind_Speed     5
Cloud_Cover    4
Pressure       4
Rain           2
dtype: int64


In [17]:
# Bir xil raqamli qiymatga o'tib olamiz. Bunda Label encoderdan foydalanamiz

for column in data_f.columns:
    if data_f[column].isnull().sum()>0:
        if data_f[column].dtype=='object':
            data_f[column].fillna(data_f[column].mode()[0], inplace=True)
        else:
            data_f[column].fillna(data_f[column].mean(), inplace=True)

data_f.info()

data_f['Rain'] = data_f['Rain'].map({'no rain': 0, 'rain': 1})



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2500 entries, 0 to 2499
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Temperature  2500 non-null   float64
 1   Humidity     2500 non-null   float64
 2   Wind_Speed   2500 non-null   float64
 3   Cloud_Cover  2500 non-null   float64
 4   Pressure     2500 non-null   float64
 5   Rain         2500 non-null   object 
dtypes: float64(5), object(1)
memory usage: 117.3+ KB


In [21]:
# for column in data_f.columns:
#     if data_f[column].isnull().sum()>0:
#         data_f[column].fillna(data_f[column].mean(), inplace=True)

data_f.isnull().sum()


Temperature    0
Humidity       0
Wind_Speed     0
Cloud_Cover    0
Pressure       0
Rain           0
dtype: int64

In [22]:
# dataset tozalandandan keyin uni input va outputga bo'lib olamiz

x = data_f.drop('Rain', axis=1)
y = data_f['Rain']

In [23]:
# datasetimizni train, test va baxolash uchun 0.7:0.15:0.15 bo'lib olamiz

x_train,x_temp,y_train, y_temp=train_test_split(x,y,test_size=0.3, random_state=42)
x_val,x_test,y_val, y_test=train_test_split(x_temp,y_temp,test_size=0.5,random_state=42)


In [24]:
len(x_test)

375

In [25]:
# model train qilamiz

model=LogisticRegression(max_iter=1000,solver='lbfgs')
model.fit(x_train,y_train)


In [26]:
# dastur bizga iteratsiya sonini oshirishni yoki datasetni scale qilishni maslahat bermoqda va biz scaling ni amalga oshiramiz
scaler = StandardScaler()


x_train_scaled = scaler.fit_transform(x_train)
x_val_scaled = scaler.transform(x_val)


x_test_scaled = scaler.transform(x_test)


In [27]:
# model train qilamiz

model=LogisticRegression(max_iter=1000,solver='lbfgs')
model.fit(x_train_scaled,y_train)


In [28]:
#Predicting
y_pred=model.predict(x_test_scaled)

y_pred

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,

In [29]:
# model qanchalik aniq ishlashini tekshiramiz
accuracy_score=accuracy_score(y_test,y_pred)

accuracy_score

0.9146666666666666

In [30]:
# model 91% aniqlikda ishlamoqda 
# o'zimiz qo'lda malumot kiritib tekshirib ko'ramiz. Bu uchun model ko'rmagan x_val dan ma'lumotlarni olib predikt qilib ko'ramiz
x_val.head(1)
y_val.head(1)

2489    0
Name: Rain, dtype: int64

In [31]:
# Yangi data
new_data = x_val.head(1)

# Step 2: Scaling
new_data_scaled = scaler.transform(new_data)

# Step 3: Predicting
new_prediction = model.predict(new_data_scaled)


new_prediction


array([0])

In [32]:
# dataset tozalandandan keyin uni input va outputga bo'lib olamiz

x = data_f.drop('Rain', axis=1)
y = data_f['Rain']

In [33]:

x_train, x_temp, y_train, y_temp = train_test_split(x, y, test_size=0.3, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42)

In [34]:

x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)


In [35]:
# Define Logistic Regression model
model = LogisticRegression()

In [36]:
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization strength
    'solver': ['lbfgs', 'liblinear'],  # Solvers for optimization
    'max_iter': [100, 200, 300],  # Maximum number of iterations for optimization
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=1)

grid_search.fit(x_train_scaled, y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [37]:
best_model = grid_search.best_estimator_

y_pred = best_model.predict(x_test_scaled)

y_pred

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,

In [38]:
from sklearn.metrics import accuracy_score

accuracy_score = accuracy_score(y_test,y_pred)

accuracy_score

0.9146666666666666