In [13]:
import pandas as pd

steam = pd.read_csv('games.csv',encoding='utf-8')

print(steam.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85103 entries, 0 to 85102
Data columns (total 39 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   AppID                       85103 non-null  int64  
 1   Name                        85097 non-null  object 
 2   Release date                85103 non-null  object 
 3   Estimated owners            85103 non-null  object 
 4   Peak CCU                    85103 non-null  int64  
 5   Required age                85103 non-null  int64  
 6   Price                       85103 non-null  float64
 7   DLC count                   85103 non-null  int64  
 8   About the game              81536 non-null  object 
 9   Supported languages         85103 non-null  object 
 10  Full audio languages        85103 non-null  object 
 11  Reviews                     9743 non-null   object 
 12  Header image                85103 non-null  object 
 13  Website                     394

In [21]:
# 데이터 타입 확인
print("#"*10)
print("데이터 타입 확인")
print("Data Types:\n", steam.dtypes)

# 범주형 및 수치형 데이터 분리하여 분석
categorical_cols = steam.select_dtypes(include=['object', 'category']).columns
numerical_cols = steam.select_dtypes(include=['int64', 'float64']).columns

print("#"*10)
print("범주형 데이터 분리하여 분석")
print("\nCategorical Columns:\n", categorical_cols)

print("#"*10)
print("수치형 데이터 분리하여 분석")
print("\nNumerical Columns:\n", numerical_cols)

##########
데이터 타입 확인
Data Types:
 AppID                           int64
Name                           object
Release date                   object
Estimated owners               object
Peak CCU                        int64
Required age                    int64
Price                         float64
DLC count                       int64
About the game                 object
Supported languages            object
Full audio languages           object
Reviews                        object
Header image                   object
Website                        object
Support url                    object
Support email                  object
Windows                          bool
Mac                              bool
Linux                            bool
Metacritic score                int64
Metacritic url                 object
User score                      int64
Positive                        int64
Negative                        int64
Score rank                    float64
Achievements    

In [22]:
# 결측치 확인
print("\nMissing Values:\n", steam.isnull().sum())

# 각 범주형 변수의 유니크한 값과 빈도수
for col in categorical_cols:
    print(f"\nUnique values in {col}:\n", steam[col].value_counts())

# 수치형 데이터의 기초 통계
print("\nDescriptive Statistics for Numerical Data:\n", steam[numerical_cols].describe())


Missing Values:
 AppID                             0
Name                              6
Release date                      0
Estimated owners                  0
Peak CCU                          0
Required age                      0
Price                             0
DLC count                         0
About the game                 3567
Supported languages               0
Full audio languages              0
Reviews                       75360
Header image                      0
Website                       45651
Support url                   43511
Support email                 13593
Windows                           0
Mac                               0
Linux                             0
Metacritic score                  0
Metacritic url                81191
User score                        0
Positive                          0
Negative                          0
Score rank                    85059
Achievements                      0
Recommendations                   0
Notes     


Unique values in Metacritic url:
 https://www.metacritic.com/game/pc/shadow-of-the-tomb-raider?ftag=MCD-06-10aaa1f                     20
https://www.metacritic.com/game/pc/crazy-machines-the-wacky-contraptions-game?ftag=MCD-06-10aaa1f     4
https://www.metacritic.com/game/pc/call-of-duty-black-ops-iii?ftag=MCD-06-10aaa1f                     3
https://www.metacritic.com/game/pc/brink?ftag=MCD-06-10aaa1f                                          3
https://www.metacritic.com/game/pc/the-sims-3?ftag=MCD-06-10aaa1f                                     3
                                                                                                     ..
https://www.metacritic.com/game/pc/gods-trigger?ftag=MCD-06-10aaa1f                                   1
https://www.metacritic.com/game/pc/light-of-altair?ftag=MCD-06-10aaa1f                                1
https://www.metacritic.com/game/pc/poker-night-at-the-inventory?ftag=MCD-06-10aaa1f                   1
http://www.metacritic.com/gam


Descriptive Statistics for Numerical Data:
               AppID       Peak CCU  Required age         Price     DLC count  \
count  8.510300e+04   85103.000000  85103.000000  85103.000000  85103.000000   
mean   1.355681e+06     134.872930      0.312774      7.193703      0.543412   
std    6.949952e+05    5403.548851      2.254721     12.362478     13.721223   
min    1.000000e+01       0.000000      0.000000      0.000000      0.000000   
25%    7.723900e+05       0.000000      0.000000      0.990000      0.000000   
50%    1.331540e+06       0.000000      0.000000      4.490000      0.000000   
75%    1.918415e+06       1.000000      0.000000      9.990000      0.000000   
max    2.765800e+06  872138.000000     21.000000    999.980000   2366.000000   

       Metacritic score    User score      Positive       Negative  \
count      85103.000000  85103.000000  8.510300e+04   85103.000000   
mean           3.348366      0.039822  9.585609e+02     159.772570   
std           15.421471 

In [23]:
# 왜도와 첨도 확인
"""
왜도(Skewness): 0에 가까울수록 정규분포에 근사, 양의 값은 오른쪽 꼬리가 긴 분포(왼쪽으로 치우친), 음의 값은 왼쪽 꼬리가 긴 분포(오른쪽으로 치우친)
첨도(Kurtosis): 0에 가까울수록 정규분포에 근사, 높으면 분포가 뾰족하고, 낮으면 평평
"""
print("\nSkewness of the data:\n", steam[numerical_cols].skew())
print("\nKurtosis of the data:\n", steam[numerical_cols].kurt())


Skewness of the data:
 AppID                           0.100744
Peak CCU                      116.363297
Required age                    7.163561
Price                          23.006303
DLC count                     121.279274
Metacritic score                4.475404
User score                     46.691892
Positive                      165.795226
Negative                      150.267524
Score rank                     -0.283325
Achievements                   27.065955
Recommendations               109.446324
Average playtime forever       58.969881
Average playtime two weeks     45.015460
Median playtime forever        79.548322
Median playtime two weeks      41.805559
dtype: float64

Kurtosis of the data:
 AppID                            -1.047405
Peak CCU                      16368.390342
Required age                     49.816737
Price                          1521.615642
DLC count                     17698.544563
Metacritic score                 18.396785
User score             

In [25]:
# 피어슨 상관 계수
print("Pearson Correlation:\n", steam[numerical_cols].corr(method='pearson'))

# 스피어만 상관 계수
print("\nSpearman Correlation:\n", steam[numerical_cols].corr(method='spearman'))

Pearson Correlation:
                                AppID  Peak CCU  Required age     Price  \
AppID                       1.000000 -0.014091     -0.101025 -0.054967   
Peak CCU                   -0.014091  1.000000      0.035017  0.039737   
Required age               -0.101025  0.035017      1.000000  0.096285   
Price                      -0.054967  0.039737      0.096285  1.000000   
DLC count                  -0.029680  0.009283      0.015106  0.042934   
Metacritic score           -0.275571  0.059138      0.196990  0.141263   
User score                 -0.017089 -0.000538      0.080111 -0.000718   
Positive                   -0.049009  0.643179      0.068224  0.029271   
Negative                   -0.039741  0.584971      0.064921  0.025202   
Score rank                  0.152725 -0.254030      0.079917  0.281748   
Achievements               -0.059744  0.007287      0.004949  0.006826   
Recommendations            -0.051243  0.517000      0.096702  0.043055   
Average playtime