# python stock

In [4]:
import numpy as np
import pandas as pd
import mplfinance as mpf
import matplotlib.pyplot as plt
import yfinance as yf
from datetime import datetime, date

In [2]:
pd.set_option('expand_frame_repr', False)  # 当列太多时不换行
pd.set_option('display.max_rows', 5000)  # 最多显示数据的行数

### Get a stock history data

In [5]:
# Download historical market data for GFS
# =====读入股票数据
ticker = "GFS"
# 输入开始和结束日期(if needed)
start_date = datetime(2021, 1, 1)
end_date = date.today()

# 通过yahoo finance获取所有交易数据
df = yf.download(ticker, start=start_date, end=end_date)

[*********************100%***********************]  1 of 1 completed


In [10]:
df.shape

(350, 6)

### write data into a h5-file

In [15]:
df.to_hdf('./data/gfs.h5', key='df', mode='w')

In [5]:
df = pd.read_hdf('./data/gfs.h5', 'df')

### Data wrangling

In [29]:
df.head(2)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-10-28 00:00:00-04:00,47.009998,48.0,44.48,46.400002,46.400002,18618900
2021-10-29 00:00:00-04:00,46.0,49.48,45.610001,48.740002,48.740002,4639400


In [38]:
# check dataframe info
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 350 entries, 2021-10-28 00:00:00-04:00 to 2023-03-21 00:00:00-04:00
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       350 non-null    float64
 1   High       350 non-null    float64
 2   Low        350 non-null    float64
 3   Close      350 non-null    float64
 4   Adj Close  350 non-null    float64
 5   Volume     350 non-null    int64  
dtypes: float64(5), int64(1)
memory usage: 19.1 KB


In [27]:
# reset index
# df.reset_index(inplace=True)
# # 排序、去重
# df.sort_values(by=['Date'], inplace=True)
# df.drop_duplicates(subset=['Date'], inplace=True)
# df.reset_index(inplace=True, drop=True)
# # 原数据标题空格替换为"_"
# df['Adj_Close'] = df['Adj Close']
# df.drop(['Adj Close'], axis=1, inplace=True)
# df.set_index('Date',inplace=True)

In [39]:
# write clean data to file
df.to_hdf('./data/gfs.h5', key='df', mode='w')

### 输出该股票，收盘比开盘上涨3%以上的日期

In [6]:
# (close - open)/open > 3%
filt = (df['Close']-df['Open'])/df['Open'] > 0.03
# df[filt, '3%date'] = df['Date']
filt

In [7]:
# get Datetiem with index
df[filt].index

DatetimeIndex(['2021-10-29 00:00:00-04:00', '2021-11-03 00:00:00-04:00',
               '2021-11-04 00:00:00-04:00', '2021-11-15 00:00:00-05:00',
               '2021-11-16 00:00:00-05:00', '2021-11-24 00:00:00-05:00',
               '2021-12-03 00:00:00-05:00', '2021-12-27 00:00:00-05:00',
               '2022-01-11 00:00:00-05:00', '2022-01-24 00:00:00-05:00',
               '2022-01-28 00:00:00-05:00', '2022-01-31 00:00:00-05:00',
               '2022-02-04 00:00:00-05:00', '2022-02-07 00:00:00-05:00',
               '2022-02-08 00:00:00-05:00', '2022-02-10 00:00:00-05:00',
               '2022-02-24 00:00:00-05:00', '2022-02-25 00:00:00-05:00',
               '2022-02-28 00:00:00-05:00', '2022-03-10 00:00:00-05:00',
               '2022-03-15 00:00:00-04:00', '2022-03-16 00:00:00-04:00',
               '2022-03-17 00:00:00-04:00', '2022-03-18 00:00:00-04:00',
               '2022-03-24 00:00:00-04:00', '2022-03-25 00:00:00-04:00',
               '2022-04-25 00:00:00-04:00', '2022-0

### 输出该股票，开盘比前一周期收盘 下跌2%以上的日期

In [67]:
# (open - 前close)/前close < -0.02
filt = (df['Open']-df['Close'].shift(1))/df['Close'].shift(1) < -0.02
# filt

In [66]:
df[filt].index

DatetimeIndex(['2021-11-26 00:00:00-05:00', '2021-12-17 00:00:00-05:00',
               '2021-12-20 00:00:00-05:00', '2022-01-10 00:00:00-05:00',
               '2022-01-21 00:00:00-05:00', '2022-01-24 00:00:00-05:00',
               '2022-01-25 00:00:00-05:00', '2022-02-03 00:00:00-05:00',
               '2022-02-24 00:00:00-05:00', '2022-02-28 00:00:00-05:00',
               '2022-03-10 00:00:00-05:00', '2022-03-17 00:00:00-04:00',
               '2022-04-06 00:00:00-04:00', '2022-04-11 00:00:00-04:00',
               '2022-05-09 00:00:00-04:00', '2022-05-11 00:00:00-04:00',
               '2022-05-12 00:00:00-04:00', '2022-05-16 00:00:00-04:00',
               '2022-05-18 00:00:00-04:00', '2022-05-23 00:00:00-04:00',
               '2022-05-24 00:00:00-04:00', '2022-06-03 00:00:00-04:00',
               '2022-06-07 00:00:00-04:00', '2022-06-10 00:00:00-04:00',
               '2022-06-13 00:00:00-04:00', '2022-06-16 00:00:00-04:00',
               '2022-06-22 00:00:00-04:00', '2022-0

# 假如，从GFS上市开始，每月第一个交易日买入100股票，每年最后一个交易日卖出所有股票，计数到目前的收益。

In [68]:
# - Year: 2022-2023
# - Buy:
#   - whole year, 12*100=1,200
# - Sell:
#   - whole year, 1200
# - Price: df['Open']

In [85]:
new_df = df['2022-01':'2023-03']

In [86]:
# Buy, 找到每个月的第一个交易日对应的行数据（df['Open']）, 作为买入股票的价格
# 每个月的第一行数据
df_monthly = new_df.resample("M").first() # 数据重新取样
df_monthly

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-31 00:00:00-05:00,65.089996,67.389999,64.349998,66.730003,66.730003,1766500
2022-02-28 00:00:00-05:00,49.830002,50.720001,49.458,50.09,50.09,1587000
2022-03-31 00:00:00-04:00,60.5,60.639999,57.599998,58.220001,58.220001,1800100
2022-04-30 00:00:00-04:00,62.799999,63.529999,59.48,60.740002,60.740002,2951100
2022-05-31 00:00:00-04:00,52.060001,53.860001,50.0,53.77,53.77,1913500
2022-06-30 00:00:00-04:00,59.490002,61.880001,56.07,58.119999,58.119999,2554100
2022-07-31 00:00:00-04:00,39.48,39.5,37.599998,38.810001,38.810001,3541500
2022-08-31 00:00:00-04:00,51.18,58.279999,51.115002,54.540001,54.540001,6242900
2022-09-30 00:00:00-04:00,59.169998,59.174999,55.799999,57.43,57.43,2405300
2022-10-31 00:00:00-04:00,48.880001,49.830002,47.439999,49.830002,49.830002,2024500


In [87]:
# Total cost on buy
cost = df_monthly['Open'].sum()*100
cost

84985.99967956543

In [88]:
# sell 
# 取出所有年
new_df.resample("A").last()
# 2023年股票未卖出，将最后一行去除
df_yearly = new_df.resample("A").last()[:-1]
df_yearly

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-12-31 00:00:00-05:00,53.029999,54.005001,52.450001,53.889999,53.889999,1246300


In [89]:
# 卖出股票到手的钱
resv = df_yearly['Open'].sum()*1200
resv

63635.99853515625

In [91]:
# 2023年股票价值
# 使用昨日收盘价，作为剩余股票的估值
last = 300 * new_df['Close'][-1]

In [92]:
# 总收益
resv + last - cost

-698.0022430419922

In [8]:
exam = pd.Series(np.random.randint(1,10,5))

In [9]:
exam

0    2
1    1
2    4
3    2
4    6
dtype: int64

In [11]:
exam.shift(-1)

0    1.0
1    4.0
2    2.0
3    6.0
4    NaN
dtype: float64