In [12]:
import logging , os
import numpy as np
import pandas as pd


In [16]:
try:
    df=pd.read_csv("C:/Users/User/Desktop/AI_Projects/Project_05/Data/Preprocessed_data/preprocessed_top_5.csv")
    logging.info("Data called")
except Exception as e:
    logging.error("Dataset not found")

In [14]:
log_folder = "C:/Users/User/Desktop/AI_Projects/Project_05/Log"
os.makedirs(log_folder, exist_ok=True)

log_file = os.path.join(log_folder, "feature_engineering.log")
os.environ["NUMEXPR_MAX_THREADS"] = "8"


logging.basicConfig(
    filename=log_file,
    filemode='a', 
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)
logging.info("Log tizimi ishga tushdi.")


In [15]:
def log_transform(df, threshold=0.5):
    try:
        logging.info("Log transform started .")
        
        skewness = df.skew()
        logging.info("Skewness hisoblandi.")
        
        features_log = skewness[(skewness >= threshold)].index.tolist()
        logging.info(f"Threshold {threshold} dan katta skewnessga ega ustunlar: {features_log}")
        
        for col in features_log:
            if (df[col] > 0).all():
                df[col] = np.log1p(df[col])
                logging.info(f"{col} ustunga log transform qo‘llandi.")
            else:
                logging.warning(f"{col} ustunida 0 yoki manfiy qiymatlar borligi sababli log qo‘llanmadi.")
        
        logging.info("Log transform muvaffaqiyatli yakunlandi.")
        return df

    except Exception as e:
        logging.error(f"Xatolik yuz berdi: {e}")

In [17]:
def save_dataframe(df, out_folder,file_name):
    try:
        os.makedirs(out_folder, exist_ok=True)
        output_path = os.path.join(out_folder, file_name)
        df.to_csv(output_path, index=False)
        logging.info(f"Log transformed data saved to --> {output_path}")
    except Exception as e:
        logging.info("Xatolik yuz berdi")

In [18]:
save_dataframe(df,"C:/Users/User/Desktop/AI_Projects/Project_05/Data/Engineered_data","log_transform.csv")

# Filter method

In [30]:
try:
    df=pd.read_csv("C:/Users/User/Desktop/AI_Projects/Project_05/Data/Preprocessed_data/preprocessed_top_5.csv")
    logging.info("Data called")
except Exception as e:
    logging.error("Dataset not found")

In [31]:
try:
    logging.info("Corralation ustunlarni tahlil qilish boshlandi.")

    corr_matrix = df.corr().abs()
    print(corr_matrix)
    logging.info("Corralation hisoblandi.")

    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    print(upper)
    logging.info("Matritsa yaratildi.")

    to_drop = [col for col in upper.columns if any(upper[col] > 0.7)]
    print(to_drop)
    logging.info(f"0.8 dan yuqori corralation: {to_drop}")

    df_filtered = df.drop(columns=to_drop)
    logging.info(f"Corr ustunlar tashlab yuborildi. Original shape: {df.shape}, Yangi shape: {df_filtered.shape}")

    print("\nOriginal shape:", df.shape)
    print("Shape after dropping correlated features:", df_filtered.shape)

except Exception as e:
    logging.error(f"Korrelyatsiya asosida ustunlarni tashlash jarayonida xatolik yuz berdi: {e}")

                        Club  Squad size  Average age  Foreigners  \
Club                1.000000    0.002461     0.005121    0.044094   
Squad size          0.002461    1.000000     0.338855    0.544159   
Average age         0.005121    0.338855     1.000000    0.296619   
Foreigners          0.044094    0.544159     0.296619    1.000000   
Market value        0.040036    0.041581     0.144779    0.212106   
Total market value  0.092220    0.059275     0.057097    0.086732   
League              0.021889    0.464898     0.039116    0.345249   
Season              0.011475    0.093887     0.092363    0.232314   
Category            0.007837    0.019903     0.175231    0.286763   

                    Market value  Total market value    League    Season  \
Club                    0.040036            0.092220  0.021889  0.011475   
Squad size              0.041581            0.059275  0.464898  0.093887   
Average age             0.144779            0.057097  0.039116  0.092363   
Forei

In [32]:
df=df_filtered

In [33]:
save_dataframe(df,"C:/Users/User/Desktop/AI_Projects/Project_05/Data/Engineered_data","filtered.csv")

# Wrapped +Dt

In [45]:
try:
    df=pd.read_csv("C:/Users/User/Desktop/AI_Projects/Project_05/Data/Preprocessed_data/preprocessed_top_5.csv")
    logging.info("Data called")
except Exception as e:
    logging.error("Dataset not found")

In [46]:
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
try:
    logging.info("dt training with wrapped feature engineering")
    x = df.drop(['Market value',"Category"], axis=1)
    y = df[ "Category"]
    logging.info("target va dataset olindi")
    model= DecisionTreeClassifier()
    rfe = RFE(model, n_features_to_select=5)
    rfe.fit(x, y)
    selected_cols = x.columns[rfe.support_].tolist()
    print("Selected features:", selected_cols)
    df_selected = df[selected_cols + ['Market value', "Category"]]
    logging.info("Ustunlar tanlab olindi")
except Exception as e :
    logging.info("xatolik ro'y berdi")



Selected features: ['Club', 'Squad size', 'Foreigners', 'Total market value', 'Season']


In [47]:
df=df_selected

In [48]:
save_dataframe(df,"C:/Users/User/Desktop/AI_Projects/Project_05/Data/Engineered_data","wrapped_d_tree.csv")

## Random forest + Wrapped

In [56]:
try:
    df=pd.read_csv("C:/Users/User/Desktop/AI_Projects/Project_05/Data/Preprocessed_data/preprocessed_top_5.csv")
    logging.info("Data called")
except Exception as e:
    logging.error("Dataset not found")

In [58]:
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier
try:
    logging.info(" RF training with wrapped feature engineering")
    x = df.drop(['Market value',"Category"], axis=1)
    y = df[ "Category"]
    logging.info("target va dataset olindi")
    model= RandomForestClassifier
    rfe = RFE(model, n_features_to_select=5)
    rfe.fit(x, y)
    selected_cols = x.columns[rfe.support_].tolist()
    print("Selected features:", selected_cols)
    df_selected = df[selected_cols + ['Market value', "Category"]]
    logging.info("Ustunlar tanlab olindi")
except Exception as e :
    logging.info("xatolik ro'y berdi")

In [59]:
df=df_selected

In [60]:
save_dataframe(df,"C:/Users/User/Desktop/AI_Projects/Project_05/Data/Engineered_data","wrapped_random_forest.csv")

 # Embedded + Decision Tree

In [63]:
try:
    df=pd.read_csv("C:/Users/User/Desktop/AI_Projects/Project_05/Data/Preprocessed_data/preprocessed_top_5.csv")
    logging.info("Data called")
except Exception as e:
    logging.error("Dataset not found")

In [64]:
from sklearn.tree import DecisionTreeRegressor
try:
    logging.info(" RF training with Embedded feature engineering")
    x = df.drop(['Market value',"Category"], axis=1)
    y = df[ "Category"]
    logging.info("target va dataset olindi")
    dt = DecisionTreeRegressor(random_state=42)
    dt.fit(x, y)
    importance = pd.DataFrame({
        "Feature": x.columns,
        "Importance": dt.feature_importances_
    }).sort_values(by="Importance", ascending=False)

    print(importance)

    top_features = importance["Feature"].head(5).tolist()
    print("Tanlangan ustunlar:", top_features)
    df_selected = df[top_features + ['Category']]
    logging.info("Feature selection is finished")
except Exception as e:
    logging.info("Xatolik yuz berdi")

              Feature  Importance
4  Total market value    0.372969
6              Season    0.196426
3          Foreigners    0.117908
0                Club    0.109794
1          Squad size    0.089564
5              League    0.065955
2         Average age    0.047384
Tanlangan ustunlar: ['Total market value', 'Season', 'Foreigners', 'Club', 'Squad size']


In [65]:
df=df_selected
save_dataframe(df,"C:/Users/User/Desktop/AI_Projects/Project_05/Data/Engineered_data","embedded_DT.csv")