In [15]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql import text
import pymysql , subprocess , time
from sshtunnel import SSHTunnelForwarder
from sqlalchemy.exc import SQLAlchemyError
import settings

# 現在の作業ディレクトリの絶対パスを取得
import os
current_directory = os.getcwd()

# SSH接続情報
ssh_host = settings.SSH_HOST
ssh_port = settings.SSH_PORT  # デフォルトのSSHポート
ssh_user = settings.SSH_USER
ssh_key = current_directory + settings.SSH_KEY
print(ssh_host , ssh_port , ssh_user , ssh_key)

#port
local_port = 3307  # 任意のローカルポート
remote_port = 3306  # リモートMySQLサーバーのポート

# データベースの接続情報を設定
db_user = settings.DB_USER
db_password = settings.DB_PASSWORD
db_host = settings.DB_HOST
db_port = settings.DB_PORT
db_name = settings.DB_NAME

# 集計期間と取得サイト
collect_term=["2023-01-01" , "2024-12-31" ]
siteid_list=[443, 427 , 486 , 423 , 477 ,483 , 484 , 486]

# SSHトンネルを通す
def ssh_request():

    # SSHトンネルを作成
    ssh_command = [
        'ssh',
        '-i', ssh_key,
        '-L', f'{local_port}:127.0.0.1:{remote_port}',
        '-N',  # コマンドを実行せずにフォアグラウンドで実行
        '-f',  # バックグラウンドで実行
        '-p', str(ssh_port),
        f'{ssh_user}@{ssh_host}'
    ]

    # SSHトンネルの開始
    subprocess.run(ssh_command, check=True)
    print(f"SSH tunnel established on local port {local_port}")
    
    return


#DB　接続
def db_connect(db_user,db_password,local_port,db_name , site_id , startday , endday ):

    # データベース接続エンジンの作成
    engine = create_engine(f'mysql+pymysql://{db_user}:{db_password}@127.0.0.1:{local_port}/{db_name}')
    print("Database connection established.")

    #DB取得クエリ
    subscribe_query = text("""
            SELECT *
            FROM swan_analyze.analyze_ppv_all
            WHERE public_flg =1 AND site_id=:site_id AND date >=:start_day AND date <=:end_day
            """)
    
    #ここでエンジンを使用してデータベース操作を行います
    with engine.connect() as connection:
        result = connection.execute(subscribe_query , {"site_id":site_id , "start_day":startday , "end_day":endday})
        print(result)

        return result

        except SQLAlchemyError as e:
        print(f"An error occurred: {e}")

    
    

for s in siteid_list[5:6]:
    try:
        ssh_request()

        # SSHトンネルが確立されるまで少し待つ
        time.sleep(5)
        data = db_connect(db_user,db_password,local_port,db_name, s, collect_term[0] , collect_term[1] )


        # クエリの結果をPandas DataFrameに変換
        columns = data.keys()
        df = pd.DataFrame(result.fetchall(), columns=columns)


SyntaxError: incomplete input (711554982.py, line 91)

In [10]:
#エラーをチェック
print(df.isnull().sum())
print(len(df))

#DBデータをエキスポート
df.to_csv('ppv_history_origin.csv' ,  encoding='utf-8_sig')

site_id         0
carrier_id      0
member_id       0
uid             0
menuid          0
serviceid       0
start_date      0
end_date        0
payment_type    0
price           0
charge          0
public_flg      0
date            0
dtype: int64
8546


In [13]:
###### 加工データの作成

### データ生成

#全購入金額
df_total_price = df[['member_id' , 'price']].groupby('member_id').sum().reset_index()
#print(df_total_price)

#商品購入リスト
data_item = df.groupby('member_id')['menuid'].apply(list).reset_index()
#print(data_item)

#日付　リスト
data_inflow = df.groupby('member_id')['start_date'].apply(list).reset_index()
#print(data_inflow)

### 上記のデータを元データにマージ
df_add = pd.merge(df , df_total_price, on=['member_id'] , how='outer')
df_add = pd.merge(df_add , data_item, on=['member_id'] , how='outer')
df_add = pd.merge(df_add , data_inflow, on=['member_id'] , how='outer')

# 列名を変更
df_add = df_add.rename(columns={
    'price_y': 'total_sale',
    'menuid_y':'ppv_history' ,
    'start_date_y':'date_history' ,
    })


### 不要データの整理

#必要なカラムのみ抽出
column_list = ['site_id', 'member_id' , 'total_sale' , 'ppv_history', 'date_history'] #,'date_history' ]
df_add = df_add[column_list]

#重複データの削除
df_add = df_add.drop_duplicates(subset=['member_id'])

#6000円以上の人を抽出
df_add = df_add.query(' 6000 < total_sale ') 


print(df_add.tail(50) , len(df_add) )

#加工データをエキスポート
df_add.to_csv('ppv_processing.csv' ,  encoding='utf-8_sig')

      site_id                         member_id  total_sale  \
8033      483  ec6b9b8fab9ed3bf15cf5c97c0613546        9000   
8037      483  ec919cb134c42ee153006fbab5fb532f       12600   
8046      483  ecb6ae9dbcf2026a1f08c48c43d8409f       44200   
8076      483  eddbf44d686cc3d85dd8de1b9c59e6a0        7000   
8080      483  eddfcc7a417552ad272d908df8c0fb7d        8700   
8085      483  edf7c4734ccd5259dec650ca22c73655       12800   
8093      483  ee28b68c39945141903940344f62efe3        7500   
8096      483  ee45c4175f4725e82ec3a0910217bff4        6200   
8103      483  eeb8a81b319088272da0ba07e4d1dbeb        9500   
8107      483  eec0fd4eb66f16f6a6d74f3840a69032        8500   
8118      483  efb13c38f8666422530de370ea0423f5       13500   
8124      483  f05b84f366256c82f2918f188692571b        8800   
8132      483  f1111a0d83a65658692660f4ace1a288        7500   
8146      483  f1d23b04ed414106925a0bea2aacd48b        6700   
8149      483  f1fd60b994a4644a714236f4cda61739       3

Unnamed: 0,site_id,carrier_id,member_id,uid,menuid,serviceid,start_date,end_date,payment_type,price,charge,public_flg,date
0,483,6,c1dace7fae7bc015acede89594470760,0001GJhyHQkVnWK0LfZpoXcG0MOVc,007,00073734512,2024-01-01 00:21:07,0000-00-00 00:00:00,1,2000,2000,1,2024-01-01
1,483,6,b0ea90aff78d1d0d72fea66af07f6380,0001GNBBkXR7rpU7mBuNorNDDHOZ4,014,00073734512,2024-01-01 00:43:49,0000-00-00 00:00:00,1,1200,1200,1,2024-01-01
2,483,6,e19c56f028800424e2b92a0fe3b0144d,0001GNtOnrRsmfa5m02XFEWiQNOaK,002,00073734512,2024-01-01 01:23:05,0000-00-00 00:00:00,1,3000,3000,1,2024-01-01
3,483,6,e19c56f028800424e2b92a0fe3b0144d,0001GNtOnrRsmfa5m02XFEWiQNOaK,009,00073734512,2024-01-01 01:34:27,0000-00-00 00:00:00,1,2500,2500,1,2024-01-01
4,483,6,421808c1273e07e304adc59ccf0461cc,0001GJq6Hdf0fVQ0Crg6P0jSNoObO,006,00073734512,2024-01-01 01:46:57,0000-00-00 00:00:00,1,2000,2000,1,2024-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8072,483,110,769443f13d0a4f398d9db0e0823b38a9,MKB_00057139,818,,2024-11-30 21:22:55,0000-00-00 00:00:00,2,1500,1500,1,2024-11-30
8073,483,110,769443f13d0a4f398d9db0e0823b38a9,MKB_00057139,606,,2024-11-30 21:35:27,0000-00-00 00:00:00,2,3000,3000,1,2024-11-30
8074,483,110,769443f13d0a4f398d9db0e0823b38a9,MKB_00057139,011,,2024-11-30 22:54:01,0000-00-00 00:00:00,2,2000,2000,1,2024-11-30
8075,483,110,65dddb6776f94d448073e3383bf66f6d,amzn1.account.AGVNB23XM35665YN7PRALN2ZMKUA,011,,2024-11-30 23:13:21,0000-00-00 00:00:00,6,2000,2000,1,2024-11-30
