# Import libraries

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

# Read csv

## Match result

In [2]:
match_result = pd.read_csv('eda_data/match_result.csv')
print(match_result.shape)
match_result.head(1)

## Match result statistics

In [3]:
result_stats = pd.read_csv('eda_data/result_percentage.csv')
print(result_stats.shape)
result_stats.head(1)

## Shooting stats

In [4]:
shooting = pd.read_csv('eda_data/df_shooting.csv')
print(shooting.shape)
shooting.head(1)

In [5]:
shooting_explain = pd.read_csv('eda_data/df_shooting_explained.csv')
print(shooting_explain.shape)
shooting_explain

## Defensive Actions

In [6]:
# Defensive Actions Stats
defensive_actions = pd.read_csv('eda_data/df_defensive_actions.csv')
print(defensive_actions.shape)
defensive_actions.head(1)

In [7]:
defensive_actions_explain = pd.read_csv('eda_data/df_defensive_actions_explained.csv')
print(defensive_actions_explain.shape)
defensive_actions_explain

## Goal Keeping Stats

In [8]:
goal_keeping = pd.read_csv('eda_data/df_goal_keeping.csv')
print(goal_keeping.shape)
goal_keeping.head(1)

In [9]:
goal_keeping_explain = pd.read_csv('eda_data/df_goal_keeping_explained.csv')
print(goal_keeping_explain.shape)
goal_keeping_explain

## Goal Shots Creation Stats

In [10]:
goal_shot_creation = pd.read_csv('eda_data/df_goal_shot_creation.csv')
print(goal_shot_creation.shape)
goal_shot_creation.head(1)

In [11]:
goal_shot_creation_explain = pd.read_csv('eda_data/df_goal_shot_creation_explained.csv')
print(goal_shot_creation_explain.shape)
goal_shot_creation_explain

# EDA1: Shooting

## Create shoot_to_result dataFrame

In [12]:
#match_result = match_result.drop(columns=['Unnamed'])
#shooting = shooting.drop(columns=['Unnamed'])

shoot_to_result = pd.concat([match_result, shooting], axis=1)

# Thêm cột 'Win' theo điều kiện
shoot_to_result['Win'] = shoot_to_result.apply(
    lambda row: 1 if row['GF'] > row['GA'] else (0 if row['GF'] == row['GA'] else -1),
    axis=1
)

# Hiển thị kết quả
shoot_to_result.head(1)

## Correlation

In [13]:
# List of shooting-related columns
shooting_columns = [
    'Standard__Gls', 'Standard__Sh', 'Standard__SoT', 'Standard__SoT%', 
    'Standard__G/Sh', 'Standard__G/SoT', 'Standard__Dist', 'Standard__FK', 
    'Standard__PK', 'Standard__PKatt'
]

# Select the relevant columns: shooting-related columns and GF/Win columns
temp_df = shoot_to_result[shooting_columns + ['GF', 'Win']]

# Calculate the correlation between the selected columns and 'GF'
correlation_shooting_GF = temp_df[shooting_columns + ['GF']].corr()

# Calculate the correlation between the selected columns and 'Win'
correlation_shooting_Win = temp_df[shooting_columns + ['Win']].corr()

# Optionally, save the correlation matrices to CSV files
#correlation_shooting_GF.to_csv('eda_data/correlation_shooting_GF.csv')
#correlation_shooting_Win.to_csv('eda_data/correlation_shooting_Win.csv')


### Shoot to goals

In [14]:
# Drop the first and last row using slicing on the DataFrame
correlation_shooting_GF_cleaned = correlation_shooting_GF[1:-1]

print(correlation_shooting_GF_cleaned['GF'])


In [15]:
# Chọn các hàng có tương quan từ 0.4 hoặc -0.4 trở lên với 'GF'
good_shooting_features = correlation_shooting_GF_cleaned[(correlation_shooting_GF_cleaned['GF'] >= 0.4) | (correlation_shooting_GF_cleaned['GF'] <= -0.4)]

# Chọn các hàng có tương quan từ 0.2 đến 0.4 và -0.2 đến -0.4 với 'GF'
potential_shooting_features = correlation_shooting_GF_cleaned[((correlation_shooting_GF_cleaned['GF'] >= 0.2) & (correlation_shooting_GF_cleaned['GF'] < 0.4)) | 
                                                    ((correlation_shooting_GF_cleaned['GF'] <= -0.2) & (correlation_shooting_GF_cleaned['GF'] > -0.4))]

# Lưu các DataFrame vào file CSV
good_shooting_features = good_shooting_features['GF']
potential_shooting_features = potential_shooting_features['GF']
good_shooting_features.to_csv('eda_data/selected/good_shooting_features.csv', index=False)
potential_shooting_features.to_csv('eda_data/selected/potential_shooting_features.csv', index=False)

print("Files saved: good_shooting_features.csv and potential_shooting_features.csv")

In [16]:
good_shooting_features

In [17]:
potential_shooting_features

### Shot to win

In [18]:
# Drop the first and last row using slicing on the DataFrame
correlation_shooting_Win_cleaned = correlation_shooting_Win[0:-1]

print(correlation_shooting_Win_cleaned['Win'])

In [19]:
# Chọn các hàng có tương quan từ 0.4 hoặc -0.4 trở lên với 'Win'
good_shooting_features_win = correlation_shooting_Win_cleaned[(correlation_shooting_Win_cleaned['Win'] >= 0.4) | (correlation_shooting_Win_cleaned['Win'] <= -0.4)]

# Chọn các hàng có tương quan từ 0.2 đến 0.4 và -0.2 đến -0.4 với 'Win'
potential_shooting_features_win = correlation_shooting_Win_cleaned[((correlation_shooting_Win_cleaned['Win'] >= 0.2) & (correlation_shooting_Win_cleaned['Win'] < 0.4)) | 
                                                    ((correlation_shooting_Win_cleaned['Win'] <= -0.2) & (correlation_shooting_Win_cleaned['Win'] > -0.4))]

# Lưu các DataFrame vào file CSV
good_shooting_features_win = good_shooting_features_win['Win']
potential_shooting_features_win = potential_shooting_features_win['Win']
good_shooting_features_win.to_csv('eda_data/selected/good_shooting_features_to_win.csv', index=False)
potential_shooting_features_win.to_csv('eda_data/selected/potential_shooting_features_to_win.csv', index=False)

print("Files saved: good_shooting_features_win.csv and potential_shooting_features_win.csv")


In [20]:
good_shooting_features_win

In [21]:
potential_shooting_features_win

# EDA 2: Goal Keeping

In [22]:
match_result['Win'] = match_result.apply(
    lambda row: 1 if row['GF'] > row['GA'] else (0 if row['GF'] == row['GA'] else -1),
    axis=1)

In [23]:
goal_keeping = goal_keeping.drop(columns='Unnamed: 0')
GK_to_result = pd.concat([match_result, goal_keeping], axis=1)


# Hiển thị kết quả
GK_to_result.head(1)

## Correlation

In [24]:
# List of GK-related columns
GK_columns = [
        'Performance__SoTA', 'Performance__GA', 'Performance__Saves', 
        'Performance__Save%', 'Performance__CS', 'Performance__PSxG', 
        'Performance__PSxG+/-', 'Penalty Kicks__PKatt', 'Penalty Kicks__PKA', 
        'Penalty Kicks__PKsv', 'Penalty Kicks__PKm', 'Launched__Cmp', 
        'Launched__Att', 'Launched__Cmp%', 'Passes__Att (GK)', 'Passes__Thr', 
        'Passes__Launch%', 'Passes__AvgLen', 'Goal Kicks__Att', 'Goal Kicks__Launch%', 
        'Goal Kicks__AvgLen', 'Crosses__Opp', 'Crosses__Stp', 'Crosses__Stp%', 
        'Sweeper__#OPA'
    ]

# Select the relevant columns: GK-related columns and GA/Win columns
temp_df = GK_to_result[GK_columns + ['GA', 'Win']]

# Calculate the correlation between the selected columns and 'GA'
correlation_GK_GA = temp_df[GK_columns + ['GA']].corr()

# Calculate the correlation between the selected columns and 'Win'
correlation_GK_Win = temp_df[GK_columns + ['Win']].corr()

### GK to Goal against

In [25]:
# Drop the first and last row using slicing on the DataFrame
correlation_GK_GA_cleaned = correlation_GK_GA[0:-1]
correlation_GK_GA_cleaned = correlation_GK_GA_cleaned.drop(index=correlation_GK_GA_cleaned.index[1])


print(correlation_GK_GA_cleaned['GA'])


In [26]:

# Plot the bar chart
plt.figure(figsize=(10, 6))
correlation_GK_GA_cleaned['GA'].plot(kind='bar', color='skyblue', edgecolor='black')

# Add labels and title
plt.title('Correlation of GK Features with GA', fontsize=16)
plt.xlabel('Features', fontsize=12)
plt.ylabel('Correlation', fontsize=12)
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Show the chart
plt.tight_layout()
plt.show()


In [27]:
# Chọn các hàng có tương quan từ 0.4 hoặc -0.4 trở lên với 'GA'
good_GK_features = correlation_GK_GA_cleaned[(correlation_GK_GA_cleaned['GA'] >= 0.4) | (correlation_GK_GA_cleaned['GA'] <= -0.4)]

# Chọn các hàng có tương quan từ 0.2 đến 0.4 và -0.2 đến -0.4 với 'GA'
potential_GK_features = correlation_GK_GA_cleaned[((correlation_GK_GA_cleaned['GA'] >= 0.15) & (correlation_GK_GA_cleaned['GA'] < 0.4)) | 
                                                  ((correlation_GK_GA_cleaned['GA'] <= -0.15) & (correlation_GK_GA_cleaned['GA'] > -0.4))]

# Chỉ lấy cột 'GA' từ các DataFrame đã lọc
good_GK_features = good_GK_features['GA']
potential_GK_features = potential_GK_features['GA']

# Lưu các DataFrame vào file CSV
good_GK_features.to_csv('eda_data/selected/good_GK_features.csv', index=False)
potential_GK_features.to_csv('eda_data/selected/potential_GK_features.csv', index=False)

print("Files saved: good_GK_features.csv and potential_GK_features.csv")


In [28]:
good_GK_features

In [29]:
potential_GK_features

### GK to Win

In [30]:
# Drop the first and last row using slicing on the DataFrame
correlation_GK_Win_cleaned = correlation_GK_Win[0:-1]

print(correlation_GK_Win_cleaned['Win'])




In [31]:
# Plot the bar chart
plt.figure(figsize=(10, 6))
correlation_GK_Win_cleaned['Win'].plot(kind='bar', color='skyblue', edgecolor='black')

# Add labels and title
plt.title('Correlation of GK Features with Win', fontsize=16)
plt.xlabel('Features', fontsize=12)
plt.ylabel('Correlation', fontsize=12)
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Show the chart
plt.tight_layout()
plt.show()




In [32]:
# Select rows with correlation >= 0.4 or <= -0.4 with 'Win'
good_GK_features = correlation_GK_Win_cleaned[
    (correlation_GK_Win_cleaned['Win'] >= 0.4) | (correlation_GK_Win_cleaned['Win'] <= -0.4)
]

# Select rows with correlation between 0.2 and 0.4 or -0.2 and -0.4 with 'Win'
potential_GK_features = correlation_GK_Win_cleaned[
    ((correlation_GK_Win_cleaned['Win'] >= 0.15) & (correlation_GK_Win_cleaned['Win'] < 0.4)) |
    ((correlation_GK_Win_cleaned['Win'] <= -0.15) & (correlation_GK_Win_cleaned['Win'] > -0.4))
]

# Only keep the 'Win' column from the filtered DataFrames
good_GK_features_Win = good_GK_features['Win']
potential_GK_features_Win = potential_GK_features['Win']

# Save the DataFrames to CSV files
good_GK_features_Win.to_csv('eda_data/selected/good_GK_features_Win.csv', index=False)
potential_GK_features_Win.to_csv('eda_data/selected/potential_GK_features_Win.csv', index=False)

print("Files saved: good_GK_features_Win.csv and potential_GK_features_Win.csv")

In [33]:
good_GK_features_Win

In [34]:
potential_GK_features_Win

##

# EDA 3: Goal Shot Creation

In [35]:
# Combine match_result with goal shot creation data
goal_shot_creation_to_result = pd.concat([match_result, goal_shot_creation], axis=1)

# Add a 'Win' column based on match outcome
goal_shot_creation_to_result['Win'] = goal_shot_creation_to_result.apply(
    lambda row: 1 if row['GF'] > row['GA'] else (0 if row['GF'] == row['GA'] else -1),
    axis=1
)

# Display the first row of the combined DataFrame for verification
goal_shot_creation_to_result.head(1)






## Correlation

In [36]:
# Define the list of goal shot creation columns
goal_shot_creation_cols = [
    'SCA Types__SCA', 'SCA Types__PassLive', 'SCA Types__PassDead', 
    'SCA Types__TO', 'SCA Types__Sh', 'SCA Types__Fld', 'SCA Types__Def', 
    'GCA Types__GCA', 'GCA Types__PassLive', 'GCA Types__PassDead', 
    'GCA Types__TO', 'GCA Types__Sh', 'GCA Types__Fld', 'GCA Types__Def'
]

# Select the relevant columns: goal shot creation columns and 'GF'/'Win'
temp_df = goal_shot_creation_to_result[goal_shot_creation_cols + ['GF', 'Win']]

# Calculate the correlation between the goal shot creation columns and 'GF'
correlation_goal_shot_GF = temp_df[goal_shot_creation_cols + ['GF']].corr()

# Calculate the correlation between the goal shot creation columns and 'Win'
correlation_goal_shot_Win = temp_df[goal_shot_creation_cols + ['Win']].corr()

# Optionally, save the correlation matrices to CSV files
#correlation_goal_shot_GF.to_csv('eda_data/correlation_goal_shot_GF.csv')
#correlation_goal_shot_Win.to_csv('eda_data/correlation_goal_shot_Win.csv')

### Goal shot creation to GF

In [37]:
# Drop the last row using slicing on the correlation DataFrame
correlation_goal_shot_GF_cleaned = correlation_goal_shot_GF[0:-1]

# Display the cleaned correlation with 'GF'
print(correlation_goal_shot_GF_cleaned['GF'])

In [38]:
import matplotlib.pyplot as plt

# Extract the cleaned correlation values for 'GF'
correlation_values = correlation_goal_shot_GF_cleaned['GF']

# Plot a bar chart
plt.figure(figsize=(10, 6))  # Set the figure size
correlation_values.plot(kind='bar', color='skyblue', edgecolor='black')

# Add chart details
plt.title('Correlation of Goal Shot Creation Features with GF', fontsize=16)
plt.xlabel('Goal Shot Creation Features', fontsize=12)
plt.ylabel('Correlation with GF', fontsize=12)
plt.xticks(rotation=45, ha='right', fontsize=10)  # Rotate feature labels for better visibility
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Display the plot
plt.tight_layout()
plt.show()


In [39]:

# Select features with strong correlation (≥ 0.4 or ≤ -0.4) with 'GF'
good_goal_shot_features = correlation_goal_shot_GF_cleaned[
    (correlation_goal_shot_GF_cleaned['GF'] >= 0.4) | (correlation_goal_shot_GF_cleaned['GF'] <= -0.4)
]

# Select features with potential correlation (≥ 0.2 to < 0.4 or ≤ -0.2 to > -0.4) with 'GF'
potential_goal_shot_features = correlation_goal_shot_GF_cleaned[
    ((correlation_goal_shot_GF_cleaned['GF'] >= 0.2) & (correlation_goal_shot_GF_cleaned['GF'] < 0.4)) | 
    ((correlation_goal_shot_GF_cleaned['GF'] <= -0.2) & (correlation_goal_shot_GF_cleaned['GF'] > -0.4))
]

# Save the selected features to CSV files
good_goal_shot_features = good_goal_shot_features['GF']
potential_goal_shot_features = potential_goal_shot_features['GF']
good_goal_shot_features.to_csv('eda_data/selected/good_goal_shot_features.csv', index=False)
potential_goal_shot_features.to_csv('eda_data/selected/potential_goal_shot_features.csv', index=False)

print("Files saved: good_goal_shot_features.csv and potential_goal_shot_features.csv")

In [40]:
good_goal_shot_features

In [41]:
potential_goal_shot_features

### Goal shot creation to Win

In [42]:
# Drop the first and last row using slicing on the correlation DataFrame
correlation_goal_shot_Win_cleaned = correlation_goal_shot_Win[0:-1]


# Display the cleaned correlation with 'Win'
print(correlation_goal_shot_Win_cleaned['Win'])

In [43]:
import matplotlib.pyplot as plt

# Extract the cleaned correlation values for 'Win'
correlation_values = correlation_goal_shot_Win_cleaned['Win']

# Plot a bar chart
plt.figure(figsize=(10, 6))  # Set the figure size
correlation_values.plot(kind='bar', color='lightcoral', edgecolor='black')

# Add chart details
plt.title('Correlation of Goal Shot Creation Features with Win', fontsize=16)
plt.xlabel('Goal Shot Creation Features', fontsize=12)
plt.ylabel('Correlation with Win', fontsize=12)
plt.xticks(rotation=45, ha='right', fontsize=10)  # Rotate feature labels for better visibility
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Display the plot
plt.tight_layout()
plt.show()


In [44]:


# Select features with strong correlation (≥ 0.4 or ≤ -0.4) with 'Win'
good_goal_shot_features_win = correlation_goal_shot_Win_cleaned[
    (correlation_goal_shot_Win_cleaned['Win'] >= 0.2) | (correlation_goal_shot_Win_cleaned['Win'] <= -0.2)
]

# Select features with potential correlation (≥ 0.2 to < 0.4 or ≤ -0.2 to > -0.4) with 'Win'
potential_goal_shot_features_win = correlation_goal_shot_Win_cleaned[
    ((correlation_goal_shot_Win_cleaned['Win'] >= 0.1) & (correlation_goal_shot_Win_cleaned['Win'] < 0.2)) | 
    ((correlation_goal_shot_Win_cleaned['Win'] <= -0.1) & (correlation_goal_shot_Win_cleaned['Win'] > -0.2))
]

# Save the selected features to CSV files
good_goal_shot_features_win = good_goal_shot_features_win['Win']
potential_goal_shot_features_win = potential_goal_shot_features_win['Win']
good_goal_shot_features_win.to_csv('eda_data/selected/good_goal_shot_features_win.csv', index=False)
potential_goal_shot_features_win.to_csv('eda_data/selected/potential_goal_shot_features_win.csv', index=False)

print("Files saved: good_goal_shot_features_win.csv and potential_goal_shot_features_win.csv")

In [45]:
good_goal_shot_features_win

In [46]:
potential_goal_shot_features_win

# EDA 4: Defensive actions

In [47]:
# List các cột liên quan đến các hành động phòng ngự
defensive_actions_cols = [
    'Tackles__Tkl', 'Tackles__TklW', 'Tackles__Def 3rd', 'Tackles__Mid 3rd', 
    'Tackles__Att 3rd', 'Challenges__Tkl', 'Challenges__Att', 'Challenges__Tkl%', 
    'Challenges__Lost', 'Blocks__Blocks', 'Blocks__Sh', 'Blocks__Pass', 'Int', 
    'Tkl+Int', 'Clr', 'Err'
]

# Kết hợp dữ liệu với bảng match_result
defensive_actions_to_result = pd.concat([match_result, defensive_actions], axis=1)

# Hiển thị kết quả
defensive_actions_to_result.head(1)

## Correlation

In [48]:
# Chọn các cột liên quan đến hành động phòng ngự và các cột GA, Win
temp_df_defensive = defensive_actions_to_result[defensive_actions_cols + ['GA', 'Win']]

# Tính toán độ tương quan giữa các cột đã chọn và 'GA'
correlation_defensive_GA = temp_df_defensive[defensive_actions_cols + ['GA']].corr()

# Tính toán độ tương quan giữa các cột đã chọn và 'Win'
correlation_defensive_Win = temp_df_defensive[defensive_actions_cols + ['Win']].corr()

### Defensive actions to GA

In [49]:
# Làm sạch ma trận tương quan với GA
correlation_defensive_GA_cleaned = correlation_defensive_GA[0:-1]
correlation_defensive_GA_cleaned = correlation_defensive_GA_cleaned.drop(index=correlation_defensive_GA_cleaned.index[1])

# Hiển thị độ tương quan với GA
print(correlation_defensive_GA_cleaned['GA'])

In [50]:
# Vẽ biểu đồ bar cho độ tương quan của các tính năng phòng ngự với GA
plt.figure(figsize=(10, 6))
correlation_defensive_GA_cleaned['GA'].plot(kind='bar', color='lightgreen', edgecolor='black')

# Thêm nhãn và tiêu đề
plt.title('Correlation of Defensive Actions Features with GA', fontsize=16)
plt.xlabel('Features', fontsize=12)
plt.ylabel('Correlation', fontsize=12)
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Hiển thị biểu đồ
plt.tight_layout()
plt.show()

In [51]:
# Chọn các hàng có độ tương quan từ 0.4 hoặc -0.4 trở lên với 'GA'
good_defensive_features = correlation_defensive_GA_cleaned[(correlation_defensive_GA_cleaned['GA'] >= 0.1) | 
                                                          (correlation_defensive_GA_cleaned['GA'] <= -0.1)]

# Chỉ lấy cột 'GA' từ các DataFrame đã lọc
good_defensive_features = good_defensive_features['GA']


# Lưu các DataFrame vào file CSV
good_defensive_features.to_csv('eda_data/selected/good_defensive_features.csv', index=False)


print("Files saved: good_defensive_features.csv")

In [52]:
good_defensive_features

### Defensive actions to Win

In [53]:
# Tính toán độ tương quan giữa các cột đã chọn và 'Win'
correlation_defensive_Win = temp_df_defensive[defensive_actions_cols + ['Win']].corr()

# Làm sạch ma trận tương quan với 'Win'
correlation_defensive_Win_cleaned = correlation_defensive_Win[0:-1]
correlation_defensive_Win_cleaned = correlation_defensive_Win_cleaned.drop(index=correlation_defensive_Win_cleaned.index[1])

# Hiển thị độ tương quan với Win
print(correlation_defensive_Win_cleaned['Win'])

In [54]:
# Vẽ biểu đồ bar cho độ tương quan của các tính năng phòng ngự với Win
plt.figure(figsize=(10, 6))
correlation_defensive_Win_cleaned['Win'].plot(kind='bar', color='lightcoral', edgecolor='black')

# Thêm nhãn và tiêu đề
plt.title('Correlation of Defensive Actions Features with Win', fontsize=16)
plt.xlabel('Features', fontsize=12)
plt.ylabel('Correlation', fontsize=12)
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Hiển thị biểu đồ
plt.tight_layout()
plt.show()


In [55]:
# Chọn các hàng có độ tương quan từ 0.4 hoặc -0.4 trở lên với 'Win'
good_defensive_features_win = correlation_defensive_Win_cleaned[(correlation_defensive_Win_cleaned['Win'] >= 0.1) | 
                                                               (correlation_defensive_Win_cleaned['Win'] <= -0.1)]

# Chọn các hàng có độ tương quan từ 0.2 đến 0.4 và -0.2 đến -0.4 với 'Win'
potential_defensive_features_win = correlation_defensive_Win_cleaned[((correlation_defensive_Win_cleaned['Win'] >= 0.05) & 
                                                                      (correlation_defensive_Win_cleaned['Win'] < 0.1)) | 
                                                                     ((correlation_defensive_Win_cleaned['Win'] <= -0.05) & 
                                                                      (correlation_defensive_Win_cleaned['Win'] > -0.1))]

# Chỉ lấy cột 'Win' từ các DataFrame đã lọc
good_defensive_features_win = good_defensive_features_win['Win']
potential_defensive_features_win = potential_defensive_features_win['Win']

# Lưu các DataFrame vào file CSV
good_defensive_features_win.to_csv('eda_data/selected/good_defensive_features_win.csv', index=False)
potential_defensive_features_win.to_csv('eda_data/selected/potential_defensive_features_win.csv', index=False)

print("Files saved: good_defensive_features_win.csv and potential_defensive_features_win.csv")

In [56]:
good_defensive_features_win

In [57]:
potential_defensive_features_win

# Conclusion

In [59]:
# Tạo một DataFrame trống để lưu các tên hàng
all_feature_names = pd.DataFrame()

# Dùng pd.concat để thêm tên hàng từ từng DataFrame vào all_feature_names
all_feature_names = pd.concat([all_feature_names, good_defensive_features_win.index.to_frame(name='Feature_Name')])
all_feature_names = pd.concat([all_feature_names, good_defensive_features.index.to_frame(name='Feature_Name')])
all_feature_names = pd.concat([all_feature_names, good_GK_features_Win.index.to_frame(name='Feature_Name')])
all_feature_names = pd.concat([all_feature_names, good_GK_features.index.to_frame(name='Feature_Name')])
all_feature_names = pd.concat([all_feature_names, good_goal_shot_features_win.index.to_frame(name='Feature_Name')])
all_feature_names = pd.concat([all_feature_names, good_goal_shot_features.index.to_frame(name='Feature_Name')])
all_feature_names = pd.concat([all_feature_names, good_shooting_features_win.index.to_frame(name='Feature_Name')])
all_feature_names = pd.concat([all_feature_names, good_shooting_features.index.to_frame(name='Feature_Name')])

# Đặt lại chỉ mục và loại bỏ chỉ mục cũ
all_feature_names.reset_index(drop=True, inplace=True)

# Hiển thị DataFrame kết quả
print(all_feature_names)

# Lưu DataFrame vào file CSV
all_feature_names.to_csv('eda_data/selected/all_feature_names.csv', index=False)
