In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# from google.colab import drive
# drive.mount('/content/drive')
data = pd.read_csv("./crypto-markets.csv", delimiter=',')

In [4]:
# 各暗号通貨ごとに記述統計を生成し、TOP10を表示
cryptos = data['symbol'].unique()

# 各暗号通貨ごとの記述統計を生成し、指定したジャンルに基づいてトップ10を表示する関数
def display_top_cryptos(data, column, metric, top_n=10):
    cryptos = data['symbol'].unique()
    stats_list = []

    for crypto in cryptos:
        crypto_data = data[data['symbol'] == crypto]
        if not crypto_data.empty:
            # 主な変数の記述統計を生成
            stats = crypto_data.describe()
            value = stats.loc[metric, column]
            stats_list.append({'symbol': crypto, metric: value})

    stats_df = pd.DataFrame(stats_list)
    top_cryptos = stats_df.nlargest(top_n, metric)

    print(f"\nTop {top_n} Cryptocurrencies by {metric} of {column}:")
    print(top_cryptos)

# 平均値に基づいてトップ10の暗号通貨を表示
display_top_cryptos(data, column='close', metric='mean', top_n=10)

# 最大値に基づいてトップ10の暗号通貨を表示
display_top_cryptos(data, column='close', metric='max', top_n=10)

# 最小値に基づいてトップ10の暗号通貨を表示
display_top_cryptos(data, column='close', metric='min', top_n=10)

# 標準偏差に基づいてトップ10の暗号通貨を表示
display_top_cryptos(data, column='close', metric='std', top_n=10)


Top 10 Cryptocurrencies by mean of close:
       symbol           mean
2003    BTWTY  393660.937914
2004    NANOX  169922.649835
2002       42   16060.952332
1196      RMC    7749.223071
1172      PBT    4001.037090
1355   BITBTC    3288.321170
1256     IDXM    2479.137166
0         BTC    2287.287272
1836     CEFS    1521.566615
1362  BITGOLD    1356.802963

Top 10 Cryptocurrencies by max of close:
      symbol         max
2004   NANOX  2300740.00
2003   BTWTY  1569870.00
2002      42   793273.00
1196     RMC    48759.40
1355  BITBTC    21139.30
0        BTC    19497.40
1836    CEFS    13053.40
1256    IDXM    11037.10
1172     PBT     9582.44
1914     TER     4852.15

Top 10 Cryptocurrencies by min of close:
       symbol       min
2003    BTWTY  46807.40
2004    NANOX  20724.80
1172      PBT   1425.32
1362  BITGOLD    762.91
1736     FOIN    498.91
1196      RMC    384.03
1836     CEFS    315.19
1256     IDXM    200.35
1355   BITBTC    180.90
4         BCH    179.06

Top 10 Cryptoc

In [5]:
# symbolカラムのユニークな種類の数
unique_symbols_count = data['symbol'].nunique()
print(f'Number of unique symbols: {unique_symbols_count}')

# symbolカラムのユニークな種類のリストを表示
unique_symbols = data['symbol'].unique()

# Pandasの設定を変更してすべてのユニークシンボルを表示
pd.set_option('display.max_rows', None)
unique_symbols_df = pd.DataFrame(unique_symbols, columns=['Unique Symbols'])
print(unique_symbols_df)

Number of unique symbols: 2005
     Unique Symbols
0               BTC
1               XRP
2               ETH
3               XLM
4               BCH
5               EOS
6               LTC
7              USDT
8               BSV
9               ADA
10              XMR
11              TRX
12            MIOTA
13             DASH
14              XEM
15              BNB
16              NEO
17              ETC
18              ZEC
19              BTG
20              XTZ
21              MKR
22              VET
23             DOGE
24              ONT
25              ZRX
26              OMG
27              BAT
28             TUSD
29             QTUM
30             USDC
31              DCR
32              LSK
33              PAX
34              BCD
35              BCN
36              DGB
37              ZIL
38             NANO
39               AE
40              BTS
41              ICX
42              AOA
43            WAVES
44             LINK
45              XVG
46               SC
47       

In [6]:
def plot_top_cryptocurrencies(data, top_n=10):
    # 暗号通貨ごとにグループ化し、価格と取引量の平均を計算
    grouped_data = data.groupby('symbol').agg({
        'close': 'mean',
        'volume': 'mean'
    }).reset_index()

    # 価格のトップNの暗号通貨
    top_price_cryptos = grouped_data.nlargest(top_n, 'close')

    # 取引量のトップNの暗号通貨
    top_volume_cryptos = grouped_data.nlargest(top_n, 'volume')

    # トップNの暗号通貨の価格の時系列プロット
    top_price_data = data[data['symbol'].isin(top_price_cryptos['symbol'])]
    fig = px.line(top_price_data, x='date', y='close', color='symbol', title=f'Top {top_n} Cryptocurrencies by Price Over Time')
    fig.show()

    # トップNの暗号通貨の取引量のヒストグラム
    top_volume_data = data[data['symbol'].isin(top_volume_cryptos['symbol'])]
    fig = px.histogram(top_volume_data, x='volume', color='symbol', title=f'Distribution of Daily Trading Volume for Top {top_n} Cryptocurrencies by Volume')
    fig.show()

    # トップNの暗号通貨の価格と取引量の散布図
    fig = px.scatter(top_price_data, x='close', y='volume', color='symbol', title=f'Price vs. Volume for Top {top_n} Cryptocurrencies by Price')
    fig.show()

    # トップNの暗号通貨の取引量の時系列プロット
    fig = px.line(top_volume_data, x='date', y='volume', color='symbol', title=f'Top {top_n} Cryptocurrencies by Volume Over Time')
    fig.show()

    # トップNの暗号通貨の取引量のヒストグラム
    fig = px.histogram(top_volume_data, x='volume', color='symbol', title=f'Distribution of Daily Trading Volume for Top {top_n} Cryptocurrencies by Volume')
    fig.show()

    # トップNの暗号通貨の価格と取引量の散布図
    fig = px.scatter(top_volume_data, x='close', y='volume', color='symbol', title=f'Price vs. Volume for Top {top_n} Cryptocurrencies by Volume')
    fig.show()

# 関数を呼び出してグラフを表示
plot_top_cryptocurrencies(data, top_n=10)

In [7]:
# 各暗号通貨のリターンを計算
data['Return'] = data.groupby('symbol')['close'].pct_change()

# 各暗号通貨のリターン、平均リターン、標準偏差、シャープレシオの計算
stats = data.groupby('symbol')['Return'].agg(['mean', 'std']).reset_index()
stats['Sharpe Ratio'] = stats['mean'] / stats['std']

# 各指標ごとにトップ10を出力する関数
def display_top_10(stats, metric):
    top_10 = stats.nlargest(10, metric)
    print(f"\nTop 10 Cryptocurrencies by {metric}:")
    print(top_10)

# 結果を表示
print("\nAll statistics:")
print(stats)

# 各指標ごとにトップ10を表示
display_top_10(stats, 'mean')
display_top_10(stats, 'std')
display_top_10(stats, 'Sharpe Ratio')

# # 各暗号通貨ごとに結果を出力
# for index, row in stats.iterrows():
#     print(f"\nSymbol: {row['symbol']}")
#     print(f"Mean Return: {row['mean']}")
#     print(f"Standard Deviation: {row['std']}")
#     print(f"Sharpe Ratio: {row['Sharpe Ratio']}")


All statistics:
         symbol          mean           std  Sharpe Ratio
0           $$$  3.325510e-02      0.285927  1.163061e-01
1          $PAC  4.079123e-01     12.730908  3.204110e-02
2         0XBTC -2.328136e-03      0.124455 -1.870669e-02
3          1337  5.844782e-02      0.487101  1.199911e-01
4           1ST  4.531843e-03      0.132823  3.411936e-02
5           1WO  6.377376e-03      0.171863  3.710726e-02
6         2GIVE  1.168436e-02      0.142893  8.176979e-02
7           2GO -7.782793e-02      0.180259 -4.317557e-01
8           300  1.196637e-02      0.182582  6.553960e-02
9            42  4.093790e-02      1.264915  3.236415e-02
10          611  4.156417e-02      0.362490  1.146630e-01
11          808  3.506431e-02      0.350372  1.000774e-01
12         8BIT  2.145770e-01      6.908748  3.105874e-02
13          AAA  2.242900e-01      1.227133  1.827756e-01
14          AAC -9.169789e-03      0.084442 -1.085921e-01
15         ABBC -2.347369e-02      0.194611 -1.206183e-

In [8]:
# 各暗号通貨ごとにランダムフォレストモデルを使用して予測を行う
results = []

cryptos = data['symbol'].unique()
for crypto in cryptos:
    crypto_data = data[data['symbol'] == crypto]

    if crypto_data.shape[0] > 1:  # データが少なくとも2行以上ある場合
        # 特徴量とターゲットの選定
        features = crypto_data[['volume', 'market']]
        target = crypto_data['close']

        # 欠損値の除去
        features = features.dropna()
        target = target[features.index]

        if features.shape[0] > 1:  # 特徴量が少なくとも2行以上ある場合
            # 訓練データとテストデータに分割
            X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

            # モデルの訓練
            model = RandomForestRegressor(n_estimators=100, random_state=42)
            model.fit(X_train, y_train)

            # 予測と評価
            predictions = model.predict(X_test)
            mse = mean_squared_error(y_test, predictions)

            # 結果を保存
            results.append({
                'symbol': crypto,
                'mean_squared_error': mse
            })

# 結果のデータフレームを作成
results_df = pd.DataFrame(results)

# MSEが小さい順に並べ替えてトップ10を選択
top_10_results = results_df.nsmallest(10, 'mean_squared_error')

# トップ10の結果を表示
print(top_10_results)

# 各暗号通貨ごとに結果を表示
for index, row in top_10_results.iterrows():
    print(f"Symbol: {row['symbol']}, Mean Squared Error: {row['mean_squared_error']}")

     symbol  mean_squared_error
1083   SUBX        1.417604e-16
1286    BUN        2.577150e-14
1404    BBS        2.812421e-13
1056    NDX        4.971273e-13
1201    GCN        5.408343e-13
1118  MICRO        5.438000e-13
1081    FBN        6.438000e-13
1141   DATP        6.477444e-13
1686   VPRC        7.077255e-13
1536    EZW        7.254750e-13
Symbol: SUBX, Mean Squared Error: 1.4176039083560798e-16
Symbol: BUN, Mean Squared Error: 2.5771496174333895e-14
Symbol: BBS, Mean Squared Error: 2.8124210526315987e-13
Symbol: NDX, Mean Squared Error: 4.97127272727263e-13
Symbol: GCN, Mean Squared Error: 5.408342928375735e-13
Symbol: MICRO, Mean Squared Error: 5.438000000000001e-13
Symbol: FBN, Mean Squared Error: 6.438000000000117e-13
Symbol: DATP, Mean Squared Error: 6.477444444444463e-13
Symbol: VPRC, Mean Squared Error: 7.077254863588799e-13
Symbol: EZW, Mean Squared Error: 7.25475000000001e-13
