In [109]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.offline as pyo
import cufflinks as cf
cf.go_offline(connected=True)
from collections import Counter
import plotly.graph_objects as go

# from google.colab import drive

# FIFA 22 선수분석
# 값어치(value_eur)가 제일 높은 선수 20명 
PATH = "archive/"
FIFA_22 = pd.read_csv(PATH + 'players_22.csv', encoding='utf-8')
FIFA_22_value = FIFA_22.sort_values(by='value_eur', ascending=False).copy()
FIFA_22_value.head(20)

FIFA_22.shape
# 필요한 데이터만 추출
data = FIFA_22[['short_name','age','height_cm','weight_kg','nationality_name','club_name','overall','potential',
        'value_eur','wage_eur','player_positions','preferred_foot','international_reputation',
        'skill_moves', 'work_rate']]
data.isnull().sum() # 결측치 확인
data = data.dropna()   # 결측치 제거

# 오버롤 : 포지션 능력치의 평균치 / min, max
min(data.overall), max(data.overall)

# 국가별 선수 숫자
data['nationality_name'].value_counts()

# 모든 선수들의 주발
players_foot = data.groupby(['preferred_foot']).count()[['short_name']]
players_foot = players_foot.rename(columns={'short_name':'count'})

# 데이터 분석 및 시각화
players_foot = players_foot.reset_index()
players_foot
layout_preferred_foot = {
    'title':{
        'text':'<b>Preferred_foot</b>',
        'font':{
            'size':15,
            'color':'#37474F'
        },
        'x':0.5,
        'y':0.9
    },
    'showlegend' : True,
    'xaxis':{},
    'yaxis':{}
}

# players_foot.iplot(kind='pie', theme='white', labels='preferred_foot', values='count', layout=layout_preferred_foot)


# 선수 많은 상위 10개 국가
top10 = dict(Counter(data['nationality_name'].values).most_common(10))
top10_index = top10.keys()
top10_columns = top10.values()
top10_df = pd.DataFrame({'count':top10_columns}, index=top10_index)
layout_top10 = {
    'title':{
        'text':'<b>Popular Nationalities</b>',
        'x':0.5,
        'font':{
            'size':15,
            'color':'black'
        }
    },
    'xaxis':{
        'title':"Country"
    },
    'yaxis':{
        'title':'Count',
        'dtick':250
    }

}

# top10_df.iplot(kind='bar', layout=layout_top10)

# 오버롤 상위 100명이 속한 팀은??
teams_df = data.sort_values(by='overall', ascending=False).head(100)
teams_df = teams_df['club_name'].value_counts().head(10)
fig = go.Figure()
fig.add_trace(go.Bar(
    x = teams_df.index,
    y = teams_df.values
))
fig.update_layout(
    xaxis=dict(
        tickangle=-90
    ),
    title={
        'text':'Top 100 Players by club',
        'x':0.5,
        'font':{
            'size':20
        }
    }
)
# fig.show()

# 상관관계 히트맵
corr_matrix = data[['international_reputation', 'value_eur', 'wage_eur', 'overall', 'potential', 'skill_moves', 'age', 'height_cm','weight_kg']].corr()
# corr_matrix.iplot(kind='heatmap', colorscale='RdBu', zmin=-1, zmax=1, title='Correlation Matrix Heatmap')

heatmap = go.Heatmap(
    z=corr_matrix.values,  # 상관계수 값
    x=corr_matrix.columns,  # 열 이름
    y=corr_matrix.columns,  # 행 이름
    colorscale='RdBu',
    zmin=-1, 
    zmax=1,
    text=corr_matrix.values,  # 텍스트로 표시할 상관계수 값
    hoverinfo='text'
)

# 상관계수 값을 표시하기 위한 annotations 생성
annotations = []
for i in range(len(corr_matrix)):
    for j in range(len(corr_matrix.columns)):
        annotations.append(
            go.layout.Annotation(
                text=str(round(corr_matrix.values[i][j], 2)),  # 상관계수를 텍스트로 표시
                x=corr_matrix.columns[j],
                y=corr_matrix.columns[i],
                xref='x1', yref='y1',
                showarrow=False,
                font=dict(color='black')
            )
        )

# 레이아웃 설정
layout = go.Layout(
    title='Correlation Matrix Heatmap',
    annotations=annotations
)

# Figure 생성
fig = go.Figure(data=[heatmap], layout=layout)

# 그래프 출력
fig.show()


Columns (25,108) have mixed types. Specify dtype option on import or set low_memory=False.

