In [None]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'Microsoft YaHei'
plt.rcParams['axes.unicode_minus'] = False   # 正确显示负号
import pandas as pd
import plotly.graph_objects as go
import re

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 读取 CSV 文件
df = pd.read_csv("24bit/75酒精.csv", sep=',')

# 指定要绘制的列名列表（示例）
selected_columns = ["vol24_5", "vol24_10"]

plt.figure(figsize=(12, 7))

for col in selected_columns:
    if col in df.columns:
        x = df.index  # 横坐标为行号
        y = df[col].values  # 该列数据
        plt.plot(x, y, label=col)
    else:
        print(f"警告：列 {col} 不存在于数据中")

plt.xlabel("样本序号")
plt.ylabel("电压值")
plt.title("指定列的电压曲线")
plt.grid(True)
plt.legend(loc='best')
plt.tight_layout()
plt.show()


In [None]:
target_length = 661

def preprocess(df):
    # 先截取前 target_length 行
    df_cut = df.iloc[:target_length]

    # 只保留不含NaN的列
    no_nan_mask = df_cut.notnull().all(axis=0)  # 每列是否全部非NaN

    # 筛选出无NaN的列
    df_filtered = df_cut.loc[:, no_nan_mask]

    # 返回筛选后的数据，如果列数为0，表示没有满足要求的列
    return df_filtered.reset_index(drop=True)

In [None]:
df_75酒精 = preprocess(pd.read_csv("24bit/75酒精.csv"))
df_C2H4O2 = preprocess(pd.read_csv("24bit/C2H4O2.csv"))
df_C2H6O = preprocess(pd.read_csv("24bit/C2H6O.csv"))
df_水 = preprocess(pd.read_csv("24bit/水.csv"))
df_玉米油 = preprocess(pd.read_csv("24bit/玉米油.csv"))
df_空 = preprocess(pd.read_csv("24bit/空.csv"))
df_空瓶 = preprocess(pd.read_csv("24bit/空瓶.csv"))
df_糖水 = preprocess(pd.read_csv("24bit/糖水.csv"))
df_芝麻油 = preprocess(pd.read_csv("24bit/芝麻油.csv"))

In [None]:
df_水

In [None]:
# 匹配列名格式为 vol24_数字
pattern = re.compile(r"vol24_\d+")

# 创建图形对象
fig = go.Figure()

def add_traces(df, name_prefix, color):
    for col in df.columns:
        if pattern.fullmatch(col):
            fig.add_trace(go.Scatter(
                x=df.index,
                y=df[col],
                mode="lines",
                name=f"{name_prefix} - {col}",
                line=dict(color=color, width=2),
                visible="legendonly"  # 初始隐藏
            ))

# 添加所有数据，默认隐藏
add_traces(df_75酒精, "75酒精", "blue")
add_traces(df_C2H4O2, "C2H4O2", "orange")
add_traces(df_C2H6O, "C2H6O", "green")
add_traces(df_水, "水", "red")
add_traces(df_玉米油, "玉米油", "purple")
add_traces(df_空, "空", "black")
add_traces(df_空瓶, "空瓶", "yellow")
add_traces(df_糖水, "糖水", "cyan")
add_traces(df_芝麻油, "芝麻油", "brown")

# 设置布局
fig.update_layout(
    title="vol24 组电压曲线对比",
    xaxis_title="样本序号",
    yaxis_title="电压值",
    legend=dict(orientation="h"),
    template="simple_white"
)

# 显示图形
fig.show()


In [None]:
import numpy as np
import plotly.graph_objects as go
import re

pattern = re.compile(r"vol24_\d+")
fig = go.Figure()

def fft_lowpass_filter(signal, cutoff_ratio=0.1):
    """
    对信号做FFT，保留低频成分，去除高频噪声。
    cutoff_ratio 是截止频率与总频率的比例，比如0.1表示保留10%的低频。
    """
    fft_result = np.fft.fft(signal)
    n = len(fft_result)
    cutoff = int(n * cutoff_ratio)
    
    # 构造掩码，只保留前cutoff和后cutoff部分（对称）
    mask = np.zeros(n, dtype=bool)
    mask[:cutoff] = True
    mask[-cutoff:] = True
    
    fft_filtered = fft_result * mask
    filtered_signal = np.fft.ifft(fft_filtered).real  # 取实部
    
    return filtered_signal

def add_filtered_traces(df, name_prefix, color):
    for col in df.columns:
        if pattern.fullmatch(col):
            signal = df[col].values
            filtered_signal = fft_lowpass_filter(signal, cutoff_ratio=0.1)
            fig.add_trace(go.Scatter(
                x=df.index,
                y=filtered_signal,
                mode="lines",
                name=f"{name_prefix} - {col} (FFT滤波)",
                line=dict(color=color, width=2, dash='dash'),
                visible="legendonly"
            ))

# 添加原始信号（隐藏）
def add_raw_traces(df, name_prefix, color):
    for col in df.columns:
        if pattern.fullmatch(col):
            fig.add_trace(go.Scatter(
                x=df.index,
                y=df[col],
                mode="lines",
                name=f"{name_prefix} - {col} (原始)",
                line=dict(color=color, width=1),
                visible="legendonly"
            ))

# 先绘制所有原始曲线
add_raw_traces(df_75酒精, "75酒精", "black")
add_raw_traces(df_C2H4O2, "C2H4O2", "blue")
add_raw_traces(df_C2H6O, "C2H6O", "orange")
add_raw_traces(df_水, "水", "green")
add_raw_traces(df_玉米油, "玉米与", "red")
add_raw_traces(df_空, "空", "purple")
add_raw_traces(df_空瓶, "空瓶", "yellow")
add_raw_traces(df_糖水, "糖水", "cyan")
add_raw_traces(df_芝麻油, "芝麻油", "brown")

# 再绘制所有滤波后曲线
add_filtered_traces(df_75酒精, "75酒精", "black")
add_filtered_traces(df_C2H4O2, "C2H4O2", "blue")
add_filtered_traces(df_C2H6O, "C2H6O", "orange")
add_filtered_traces(df_水, "水", "green")
add_filtered_traces(df_玉米油, "玉米与", "red")
add_filtered_traces(df_空, "空", "purple")
add_filtered_traces(df_空瓶, "空瓶", "yellow")
add_filtered_traces(df_糖水, "糖水", "cyan")
add_filtered_traces(df_芝麻油, "芝麻油", "brown")

fig.update_layout(
    title="vol24 组电压曲线对比（含FFT滤波）",
    xaxis_title="样本序号",
    yaxis_title="电压值",
    legend=dict(orientation="h"),
    template="simple_white"
)

fig.show()


In [None]:
import numpy as np

def extract_features(signal):
    fft_result = np.fft.fft(signal)
    fft_magnitude = np.abs(fft_result)
    
    freq_features = fft_magnitude[:10]
    
    mean = np.mean(signal)
    std = np.std(signal)
    max_val = np.max(signal)
    min_val = np.min(signal)
    
    features = np.concatenate([freq_features, [mean, std, max_val, min_val]])
    
    norm = np.linalg.norm(features)
    if norm != 0:
        features = features / norm
    
    return features

def extract_feature_matrix(df, pattern_str=r"vol24_\d+"):
    pattern = re.compile(pattern_str)
    feature_matrix = []
    for col in df.columns:
        if pattern.fullmatch(col):
            signal = df[col].values
            features = extract_features(signal)
            feature_matrix.append(features)
    return np.array(feature_matrix)

In [None]:
feature_matrix_75酒精 = extract_feature_matrix(df_75酒精)
feature_matrix_C2H4O2 = extract_feature_matrix(df_C2H4O2)
feature_matrix_C2H6O = extract_feature_matrix(df_C2H6O)
feature_matrix_水 = extract_feature_matrix(df_水)
feature_matrix_玉米油 = extract_feature_matrix(df_玉米油)
feature_matrix_空 = extract_feature_matrix(df_空)
feature_matrix_空瓶 = extract_feature_matrix(df_空瓶)
feature_matrix_糖水 = extract_feature_matrix(df_糖水)
feature_matrix_芝麻油 = extract_feature_matrix(df_芝麻油)

In [None]:
print(feature_matrix_空.shape)

In [None]:
def clean_nan(mat):
    return mat[~np.isnan(mat).any(axis=1)]

feature_matrix_75酒精 = clean_nan(feature_matrix_75酒精)
feature_matrix_C2H4O2 = clean_nan(feature_matrix_C2H4O2)
feature_matrix_C2H6O = clean_nan(feature_matrix_C2H6O)
feature_matrix_水 = clean_nan(feature_matrix_水)
feature_matrix_玉米油 = clean_nan(feature_matrix_玉米油)
feature_matrix_空 = clean_nan(feature_matrix_空)
feature_matrix_空瓶 = clean_nan(feature_matrix_空瓶)
feature_matrix_糖水 = clean_nan(feature_matrix_糖水)
feature_matrix_芝麻油 = clean_nan(feature_matrix_芝麻油)


In [None]:
import plotly.express as px
from sklearn.decomposition import PCA
import numpy as np

# 合并所有特征矩阵
X = np.vstack([
    feature_matrix_75酒精,
    feature_matrix_C2H4O2,
    feature_matrix_C2H6O,
    feature_matrix_水,
    feature_matrix_玉米油,
    feature_matrix_空,
    feature_matrix_空瓶,
    feature_matrix_糖水,
    feature_matrix_芝麻油
])

# 构造对应的标签数组
labels = (
    ["75酒精"] * len(feature_matrix_75酒精) +
    ["C2H4O2"] * len(feature_matrix_C2H4O2) +
          ["C2H6O"] * len(feature_matrix_C2H6O) +
          ["水"] * len(feature_matrix_水) +
          ["玉米油"] * len(feature_matrix_玉米油) +
          ["空"] * len(feature_matrix_空) +
["空瓶"] * len(feature_matrix_空瓶) +
    ["糖水"] * len(feature_matrix_糖水) +
    ["芝麻油"] * len(feature_matrix_芝麻油)
)

# PCA 降到3维
pca = PCA(n_components=3)
X_pca = pca.fit_transform(X)

# 构造 DataFrame 用于绘图
df_pca = pd.DataFrame({
    'PC1': X_pca[:, 0],
    'PC2': X_pca[:, 1],
    'PC3': X_pca[:, 2],
    'label': labels
})

# 用 plotly 绘制三维散点图
fig = px.scatter_3d(
    df_pca,
    x='PC1',
    y='PC2',
    z='PC3',
    color='label',
    title="PCA 三维降维结果",
    labels={"PC1": "主成分1", "PC2": "主成分2", "PC3": "主成分3"},
    width=800,
    height=800,
)

fig.show()


In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import joblib
import pandas as pd

# Initialize and train KNN classifier
knn = KNeighborsClassifier(n_neighbors=9)  # You can adjust n_neighbors
knn.fit(X, labels)

# Save the trained model to a file
joblib.dump(knn, 'knn_model_0708.pkl')

print("KNN model trained and saved as 'knn_model_0708.pkl'")

In [None]:

def extract_label_data(X, target_label):
    """
    Extract data from X corresponding to the specified label.
    
    Parameters:
    X: numpy array, combined feature matrix
    target_label: str, the label to extract data for (e.g., '水', '芝麻油', etc.)
    
    Returns:
    numpy array, data corresponding to the target label
    """
    # Get lengths of each feature matrix
    n_空 = len(feature_matrix_空)
    n_芝麻油 = len(feature_matrix_芝麻油)
    n_橄榄油 = len(feature_matrix_橄榄油)
    n_水 = len(feature_matrix_水)
    n_糖水 = len(feature_matrix_糖水)
    n_葵花籽油 = len(feature_matrix_葵花籽油)
    
    # Define label ranges
    idx_空_start = 0
    idx_空_end = idx_空_start + n_空

    idx_芝麻油_start = idx_空_end
    idx_芝麻油_end = idx_芝麻油_start + n_芝麻油

    idx_橄榄油_start = idx_芝麻油_end
    idx_橄榄油_end = idx_橄榄油_start + n_橄榄油

    idx_水_start = idx_橄榄油_end
    idx_水_end = idx_水_start + n_水

    idx_糖水_start = idx_水_end
    idx_糖水_end = idx_糖水_start + n_糖水

    idx_葵花籽油_start = idx_糖水_end
    idx_葵花籽油_end = idx_葵花籽油_start + n_葵花籽油

    label_ranges = {
        '空': (idx_空_start, idx_空_end),
        '芝麻油': (idx_芝麻油_start, idx_芝麻油_end),
        '橄榄油': (idx_橄榄油_start, idx_橄榄油_end),
        '水': (idx_水_start, idx_水_end),
        '糖水': (idx_糖水_start, idx_糖水_end),
        '葵花籽油': (idx_葵花籽油_start, idx_葵花籽油_end)
    }
    
    # Check if the target label is valid
    if target_label not in label_ranges:
        raise ValueError(f"Invalid label: {target_label}. Must be one of {list(label_ranges.keys())}")
    
    # Get start and end indices for the target label
    start_index, end_index = label_ranges[target_label]
    
    # Check if the data for the label is empty
    if start_index == end_index:
        raise ValueError(f"No data available for label: {target_label}")
    
    # Extract and return the data
    return X[start_index:end_index]

In [None]:
import numpy as np
import joblib

# Load the trained model
knn = joblib.load('knn_model_0708.pkl')

# Function to predict new samples
def predict_samples(new_samples):
    """
    Predict labels for new samples using the trained KNN model.
    Supports both single sample (1D array) and multiple samples (2D array).
    
    Parameters:
    new_samples: numpy array of shape (n_features,) for a single sample
                 or (n_samples, n_features) for multiple samples
    
    Returns:
    Predicted labels as a numpy array
    """
    # Convert to numpy array if not already
    new_samples = np.asarray(new_samples)
    
    # If input is 1D (single sample), reshape to 2D (1, n_features)
    if new_samples.ndim == 1:
        new_samples = new_samples.reshape(1, -1)
    
    # Ensure input is 2D
    if new_samples.ndim != 2:
        raise ValueError("Input must be a 1D or 2D numpy array")
    
    # Make predictions
    predictions = knn.predict(new_samples)
    return predictions

print(X.shape)
# Example usage
if __name__ == "__main__":
    # Assuming X is the combined feature matrix from training
    # Replace this with your actual test data or load X if needed
    # For demonstration, using a single sample from X
    sample_data = extract_label_data(X, '空')
    print("Sample data:", sample_data)
    
    # Get predictions
    predictions = predict_samples(sample_data)
    
    print("Predictions for new samples:")
    for i, pred in enumerate(predictions):
        print(f"Sample {i+1}: {pred}")