# 配置环境

In [52]:
import pandas as pd 
import numpy as np
pd.set_option('display.max_columns', None) #设置列不限制数量

In [53]:
import chardet

def check_encoding(filename): 
    rawdata = open(filename, 'rb').read() # 以二进制模式读取文件内容
    result = chardet.detect(rawdata) # 使用chardet库检测数据的编码
    encoding = result['encoding'] # 提取检测到的编码类型
    confidence = result['confidence']  # 提取检测结果的置信度(0-1之间)
    return encoding, confidence # 返回编码类型和置信度

file_path = r'C:\Users\lenovo\Desktop\000001.csv'# 添加文件路径
encoding, confidence = check_encoding(file_path)
print(f"Encoding: {encoding}, Confidence: {confidence}")

Encoding: ascii, Confidence: 1.0


# 导入数据

In [54]:
data = pd.read_csv(r'C:\Users\lenovo\Desktop\000001.csv') 
data #展示数据

Unnamed: 0,Day,Preclose,Open,Highest,Lowest,Close
0,1990/12/19,,96.050,99.980,95.790,99.980
1,1990/12/20,99.98,104.300,104.390,99.980,104.390
2,1990/12/21,104.39,109.070,109.130,103.730,109.130
3,1990/12/24,109.13,113.570,114.550,109.130,114.550
4,1990/12/25,114.55,120.090,120.250,114.550,120.250
...,...,...,...,...,...,...
8468,2025/8/25,3825.759,3848.163,3883.562,3839.972,3883.562
8469,2025/8/26,3883.562,3871.471,3888.599,3859.758,3868.382
8470,2025/8/27,3868.382,3869.612,3887.198,3800.350,3800.350
8471,2025/8/28,3800.35,3796.711,3845.087,3761.422,3843.597


In [55]:
print(data.columns.values) #打印数据框的列名

['Day' 'Preclose' 'Open' 'Highest' 'Lowest' 'Close']


# 时间序列数据的格式 Time-Series Data以及 数据框 Dataframe

In [56]:
data[['Day']] #单独列出某一列（data['Day']，data.Day）也行，但是没有表格，是series形式


Unnamed: 0,Day
0,1990/12/19
1,1990/12/20
2,1990/12/21
3,1990/12/24
4,1990/12/25
...,...
8468,2025/8/25
8469,2025/8/26
8470,2025/8/27
8471,2025/8/28


In [57]:
data[['Day','Preclose']] #选择多列

Unnamed: 0,Day,Preclose
0,1990/12/19,
1,1990/12/20,99.98
2,1990/12/21,104.39
3,1990/12/24,109.13
4,1990/12/25,114.55
...,...,...
8468,2025/8/25,3825.759
8469,2025/8/26,3883.562
8470,2025/8/27,3868.382
8471,2025/8/28,3800.35


In [58]:
data[0:9] #选择行

Unnamed: 0,Day,Preclose,Open,Highest,Lowest,Close
0,1990/12/19,,96.05,99.98,95.79,99.98
1,1990/12/20,99.98,104.3,104.39,99.98,104.39
2,1990/12/21,104.39,109.07,109.13,103.73,109.13
3,1990/12/24,109.13,113.57,114.55,109.13,114.55
4,1990/12/25,114.55,120.09,120.25,114.55,120.25
5,1990/12/26,120.25,125.27,125.27,120.25,125.27
6,1990/12/27,125.27,125.27,125.28,125.27,125.28
7,1990/12/28,125.28,126.39,126.45,125.28,126.45
8,1990/12/31,126.45,126.56,127.61,126.48,127.61


In [59]:
data.iloc[0:6,0:6] #按行列号访问，选取 data 数据框的第 0 行到第 4 行（共 5 行）以及第 0 列到第 5 列（共 6 列）的数据。

Unnamed: 0,Day,Preclose,Open,Highest,Lowest,Close
0,1990/12/19,,96.05,99.98,95.79,99.98
1,1990/12/20,99.98,104.3,104.39,99.98,104.39
2,1990/12/21,104.39,109.07,109.13,103.73,109.13
3,1990/12/24,109.13,113.57,114.55,109.13,114.55
4,1990/12/25,114.55,120.09,120.25,114.55,120.25
5,1990/12/26,120.25,125.27,125.27,120.25,125.27


In [60]:
data.at[2,'Highest'] # 按行索引，列名访问,at 适用于通过“行标签+列标签”快速定位单个元素，效率较高。

109.13

In [61]:
data[data['Day'] == "1990/12/21"].Open #先筛选出一个条件的行, 然后从筛选结果中提取其中一列

2    109.07
Name: Open, dtype: float64

# 按时间筛选

In [62]:
# pd.to_datetime() 函数用于将字符串转换为datetime对象
# format='%Y/%m/%d' 指定了输入日期的格式：年/月/日（如1990/12/19）
data['Day'] = pd.to_datetime(data['Day'],format = '%Y/%m/%d')
data

Unnamed: 0,Day,Preclose,Open,Highest,Lowest,Close
0,1990-12-19,,96.050,99.980,95.790,99.980
1,1990-12-20,99.98,104.300,104.390,99.980,104.390
2,1990-12-21,104.39,109.070,109.130,103.730,109.130
3,1990-12-24,109.13,113.570,114.550,109.130,114.550
4,1990-12-25,114.55,120.090,120.250,114.550,120.250
...,...,...,...,...,...,...
8468,2025-08-25,3825.759,3848.163,3883.562,3839.972,3883.562
8469,2025-08-26,3883.562,3871.471,3888.599,3859.758,3868.382
8470,2025-08-27,3868.382,3869.612,3887.198,3800.350,3800.350
8471,2025-08-28,3800.35,3796.711,3845.087,3761.422,3843.597


In [63]:
data = data.sort_values(by=['Day'], axis=0, ascending=True) #True是升序排列，False是降序排列
data

Unnamed: 0,Day,Preclose,Open,Highest,Lowest,Close
0,1990-12-19,,96.050,99.980,95.790,99.980
1,1990-12-20,99.98,104.300,104.390,99.980,104.390
2,1990-12-21,104.39,109.070,109.130,103.730,109.130
3,1990-12-24,109.13,113.570,114.550,109.130,114.550
4,1990-12-25,114.55,120.090,120.250,114.550,120.250
...,...,...,...,...,...,...
8468,2025-08-25,3825.759,3848.163,3883.562,3839.972,3883.562
8469,2025-08-26,3883.562,3871.471,3888.599,3859.758,3868.382
8470,2025-08-27,3868.382,3869.612,3887.198,3800.350,3800.350
8471,2025-08-28,3800.35,3796.711,3845.087,3761.422,3843.597


In [64]:
# 使用help()函数查看sort_values方法的详细帮助信息
# help()函数可以显示任何Python对象、函数或方法的文档字符串
# 这对于初学者了解函数参数和用法非常有用
help(data.sort_values)


Help on method sort_values in module pandas.core.frame:

sort_values(by: 'IndexLabel', *, axis: 'Axis' = 0, ascending: 'bool | list[bool] | tuple[bool, ...]' = True, inplace: 'bool' = False, kind: 'SortKind' = 'quicksort', na_position: 'str' = 'last', ignore_index: 'bool' = False, key: 'ValueKeyFunc | None' = None) -> 'DataFrame | None' method of pandas.core.frame.DataFrame instance
    Sort by the values along either axis.

    Parameters
    ----------
    by : str or list of str
        Name or list of names to sort by.

        - if `axis` is 0 or `'index'` then `by` may contain index
          levels and/or column labels.
        - if `axis` is 1 or `'columns'` then `by` may contain column
          levels and/or index labels.
    axis : "{0 or 'index', 1 or 'columns'}", default 0
         Axis to be sorted.
    ascending : bool or list of bool, default True
         Sort ascending vs. descending. Specify list for multiple sort
         orders.  If this is a list of bools, must ma

In [65]:
# set_index() 函数用于将指定列设置为DataFrame的索引
# inplace=True 表示直接在原数据框上进行修改，不创建新的数据框
# 通过日期直接访问对应的行数据
data.set_index('Day', inplace = True)
data['2024-9-5':'2025-08-29']

Unnamed: 0_level_0,Preclose,Open,Highest,Lowest,Close
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-09-05,2784.278,2785.264,2796.019,2777.471,2788.314
2024-09-06,2788.314,2791.765,2804.093,2765.639,2765.807
2024-09-09,2765.807,2754.724,2756.556,2726.967,2736.488
2024-09-10,2736.488,2737.881,2750.118,2718.627,2744.192
2024-09-11,2744.192,2732.731,2732.731,2710.621,2721.795
...,...,...,...,...,...
2025-08-25,3825.759,3848.163,3883.562,3839.972,3883.562
2025-08-26,3883.562,3871.471,3888.599,3859.758,3868.382
2025-08-27,3868.382,3869.612,3887.198,3800.350,3800.350
2025-08-28,3800.35,3796.711,3845.087,3761.422,3843.597
