In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal
from scipy.fft import fft, fftfreq
import librosa
import librosa.display
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tqdm.auto import tqdm
import os
import warnings
warnings.filterwarnings('ignore')

# הגדרת matplotlib לעברית
plt.rcParams['font.family'] = ['Arial Unicode MS', 'Tahoma', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False


DATA_PATH = "data/"


def load_and_prepare_data(data_path_folder):
    file_mapping = {
        'car_nothing.csv': 'quiet',
        'carnew.csv': 'vehicle',
        'human_nothing.csv': 'quiet',
        'human.csv': 'human'
    }
    label_encoding = {'quiet': 0, 'vehicle': 1, 'human': 2}
    all_data = []
    all_labels = []
    print("Starting data loading...")
    if not os.path.exists(data_path_folder):
        print(f"Data folder {data_path_folder} not found. Please create it and add data files.")
        return np.array([]), np.array([])
    for filename, activity_type in file_mapping.items():
        filepath = os.path.join(data_path_folder, filename)
        if not os.path.exists(filepath):
            print(f"Error: File not found at {filepath}. Skipping.")
            continue
        try:
            df = pd.read_csv(filepath, header=None)
            if not df.empty and df.shape[1] > 0:
                data = df.iloc[:, 0].values
                label_code = label_encoding[activity_type]
                all_data.extend(data)
                all_labels.extend([label_code] * len(data))
            else:
                print(f"Warning: File {filename} is empty or has no data columns. Skipping.")
        except Exception as e:
            print(f"Error reading {filename}: {e}")
    all_data_np = np.array(all_data)
    all_labels_np = np.array(all_labels)
    if len(all_data_np) > 0:
        print(f"Total data points loaded: {len(all_data_np)}")
    else:
        print("No data was loaded.")
    return all_data_np, all_labels_np

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def prepare_raw_data(data_dict, labels_dict, window_size_seconds=2, sample_rate=1000):
    """הכנת נתונים גולמיים עם חלונות זמן"""
    
    window_size = window_size_seconds * sample_rate  # 2000 דגימות
    
    raw_windows = []
    labels = []
    
    # חישוב מספר כולל של חלונות לtqdm
    total_windows = 0
    for filename, signal_data in data_dict.items():
        total_windows += (signal_data.shape[0] - window_size) // (window_size // 2)
    
    print(f"🔄 יצירת {total_windows} חלונות של {window_size_seconds} שניות...")
    
    with tqdm(total=total_windows, desc="עיבוד חלונות גולמיים") as pbar:
        for filename, signal_data in data_dict.items():
            label = labels_dict[filename]
            
            # חלונות עם overlap של 50%
            step_size = window_size // 2
            
            for start_idx in range(0, signal_data.shape[0] - window_size + 1, step_size):
                end_idx = start_idx + window_size
                window = signal_data[start_idx:end_idx]
                
                if window.shape[0] == window_size:
                    raw_windows.append(window)
                    labels.append(label)
                
                pbar.update(1)
                if pbar.n >= total_windows:
                    break
    
    return np.array(raw_windows), np.array(labels)


In [4]:
# 3. מודל CNN פשוט לנתונים גולמיים
def create_raw_cnn_model(input_shape, num_classes):
    """מודל CNN לנתונים גולמיים (2D: זמן × חיישנים)"""
    
    model = keras.Sequential([
        # Input layer
        layers.Input(shape=input_shape),
        
        # הוספת dimension channel
        layers.Reshape(input_shape + (1,)),
        
        # CNN layers - רואה דפוסים בזמן ובין חיישנים
        layers.Conv2D(16, (5, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 1)),
        layers.BatchNormalization(),
        
        layers.Conv2D(32, (5, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 1)),
        layers.BatchNormalization(),
        
        layers.Conv2D(64, (3, 2), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 1)),
        layers.BatchNormalization(),
        
        layers.Conv2D(128, (3, 2), activation='relu', padding='same'),
        layers.GlobalAveragePooling2D(),
        
        # Dense layers
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        
        # Output
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

In [7]:
def create_raw_1d_cnn_model(input_shape, num_classes):
    """מודל CNN 1D שמעבד כל חיישן בנפרד ואז מאחד"""
    
    # Input
    input_layer = layers.Input(shape=input_shape)  # (2000, num_sensors)
    
    # עיבוד כל חיישן בנפרד
    sensor_outputs = []
    
    for i in range(input_shape[1]):  # עבור כל חיישן
        # חילוץ חיישן בודד
        sensor_data = layers.Lambda(lambda x, idx=i: x[:, :, idx:idx+1])(input_layer)
        
        # CNN 1D על החיישן
        x = layers.Conv1D(32, 11, activation='relu', padding='same')(sensor_data)
        x = layers.MaxPooling1D(2)(x)
        x = layers.BatchNormalization()(x)
        
        x = layers.Conv1D(64, 7, activation='relu', padding='same')(x)
        x = layers.MaxPooling1D(2)(x)
        x = layers.BatchNormalization()(x)
        
        x = layers.Conv1D(128, 5, activation='relu', padding='same')(x)
        x = layers.GlobalAveragePooling1D()(x)
        
        sensor_outputs.append(x)
    
    # איחוד כל החיישנים
    if len(sensor_outputs) > 1:
        combined = layers.Concatenate()(sensor_outputs)
    else:
        combined = sensor_outputs[0]
    
    # Dense layers
    x = layers.Dense(256, activation='relu')(combined)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    
    # Output
    output = layers.Dense(num_classes, activation='softmax')(x)
    
    model = keras.Model(inputs=input_layer, outputs=output)
    return model

In [8]:
# 5. אימון מודל נתונים גולמיים
def train_raw_model(model_type='2d'):
    """אימון מודל על נתונים גולמיים"""
    
    print("🔄 שלב 1: אימון על נתונים גולמיים")
    print("=" * 50)
    
    # טעינת נתונים
    data_dict, labels_dict = load_geophone_data()
    
    if not data_dict:
        print("❌ לא נמצאו קבצי נתונים!")
        return None
    
    # הכנת נתונים גולמיים
    X_raw, y_raw = prepare_raw_data(data_dict, labels_dict, window_size_seconds=2)
    
    print(f"✅ נוצרו {len(X_raw)} חלונות גולמיים")
    print(f"📊 צורת חלון: {X_raw[0].shape}")
    print(f"📈 טווח ערכים: {X_raw.min():.4f} עד {X_raw.max():.4f}")
    
    # קידוד תוויות
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y_raw)
    y_categorical = tf.keras.utils.to_categorical(y_encoded)
    
    print(f"🏷️ קטגוריות: {label_encoder.classes_}")
    
    # נורמליזציה
    X_normalized = (X_raw - X_raw.mean()) / X_raw.std()
    
    # חלוקת נתונים
    X_train, X_test, y_train, y_test = train_test_split(
        X_normalized, y_categorical, 
        test_size=0.2, random_state=42, stratify=y_encoded
    )
    
    print(f"📈 אימון: {X_train.shape[0]} דגימות")
    print(f"📉 בדיקה: {X_test.shape[0]} דגימות")
    
    # בניית מודל
    print(f"\n🏗️ בניית מודל CNN {model_type.upper()}...")
    if model_type == '2d':
        model = create_raw_cnn_model(X_train[0].shape, len(label_encoder.classes_))
    else:
        model = create_raw_1d_cnn_model(X_train[0].shape, len(label_encoder.classes_))
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    print("📋 סיכום מודל נתונים גולמיים:")
    model.summary()
    
    # Callbacks
    callbacks = [
        keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
    ]
    
    # אימון
    print(f"\n🎯 אימון מודל נתונים גולמיים...")
    history = model.fit(
        X_train, y_train,
        batch_size=32,
        epochs=50,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )
    
    # הערכה
    print("\n📊 הערכת מודל נתונים גולמיים...")
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"🎯 דיוק מודל גולמי: {test_accuracy:.4f}")
    
    # תחזיות
    y_pred = model.predict(X_test, verbose=0)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_test_classes = np.argmax(y_test, axis=1)
    
    print("\n📈 דוח ביצועים - מודל גולמי:")
    print(classification_report(
        y_test_classes, y_pred_classes, 
        target_names=label_encoder.classes_
    ))
    
    return {
        'model': model,
        'history': history,
        'test_accuracy': test_accuracy,
        'label_encoder': label_encoder,
        'predictions': (y_test_classes, y_pred_classes),
        'type': 'raw_' + model_type
    }