In [19]:
import pandas as pd
import numpy as np

In [20]:
# 错误的尝试
df_wrong = pd.read_csv('my_cv_data.txt')
print(df_wrong)

ParserError: Error tokenizing data. C error: Expected 1 fields in line 4, saw 3


In [None]:
# 正确的读取
file_path = 'my_cv_data.txt'

df = pd.read_csv(
    file_path,
    sep='\t',       # sep = separator (分隔符)
    skiprows=7    # 跳过开头的8行，0-7
)

# 打印DataFrame的前5行
print(df.head())

   Potential(V) Current(A)  Cycle
0           0.0      0.001      1
1           0.1      0.003      1
2           0.2      0.005      1
3           0.3      0.007      1
4           0.4      ERROR      1


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Potential(V)  20 non-null     float64
 1   Current(A)    19 non-null     object 
 2   Cycle         20 non-null     int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 608.0+ bytes


In [None]:
df.describe()

Unnamed: 0,Potential(V),Current(A),Cycle
count,20.0,18.0,20.0
mean,0.25,0.003444,1.5
std,0.153897,0.002791,0.512989
min,0.0,-0.001,1.0
25%,0.1,0.00125,1.0
50%,0.25,0.0035,1.5
75%,0.4,0.00575,2.0
max,0.5,0.008,2.0


In [22]:
# 将 Current(A) 列转换为数值，无法转换的变为 NaN
df['Current(A)'] = pd.to_numeric(df['Current(A)'], errors='coerce')

# 我们再来体检一次
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Potential(V)  20 non-null     float64
 1   Current(A)    18 non-null     float64
 2   Cycle         20 non-null     int64  
dtypes: float64(2), int64(1)
memory usage: 608.0 bytes


In [23]:
# dropna = drop Not Available
# df_cleaned = df.dropna()
# print(df_cleaned.info()) # 会发现只剩18行

In [24]:
# fillna = fill Not Available
# 我们用 0.0 来填充所有的 NaN
df_cleaned = df.fillna(0.0)

# 再次检查
df_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Potential(V)  20 non-null     float64
 1   Current(A)    20 non-null     float64
 2   Cycle         20 non-null     int64  
dtypes: float64(2), int64(1)
memory usage: 608.0 bytes


In [25]:
df_cleaned = df_cleaned.rename(columns={
    'Potential(V)': 'Voltage',
    'Current(A)': 'Current'
})

print(df_cleaned.head())

   Voltage  Current  Cycle
0      0.0    0.001      1
1      0.1    0.003      1
2      0.2    0.005      1
3      0.3    0.007      1
4      0.4    0.000      1


In [26]:
# 1. 选择单一一列（返回一个 Series 对象）
voltages = df_cleaned['Voltage']
# print(voltages)

# 2. 选择多个列（注意是两个中括号 [[]]）
v_i_data = df_cleaned[['Voltage', 'Current']]
# print(v_i_data.head())

In [27]:
# 1. 创建一个“掩码”(mask)，这是一个是非题（True/False）
mask_cycle_1 = (df_cleaned['Cycle'] == 1)
# print(mask_cycle_1)

# 2. 把“掩码”扔回DataFrame，Pandas会只保留所有 True 对应的行
cycle_1_data = df_cleaned[mask_cycle_1]

print(cycle_1_data)

   Voltage  Current  Cycle
0      0.0    0.001      1
1      0.1    0.003      1
2      0.2    0.005      1
3      0.3    0.007      1
4      0.4    0.000      1
5      0.5    0.006      1
6      0.4    0.004      1
7      0.3    0.002      1
8      0.2    0.000      1
9      0.1   -0.001      1


In [28]:
mask_adv = (df_cleaned['Cycle'] == 1) & (df_cleaned['Voltage'] > 0.3)

adv_data = df_cleaned[mask_adv]

print(adv_data)

   Voltage  Current  Cycle
4      0.4    0.000      1
5      0.5    0.006      1
6      0.4    0.004      1
