In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#1
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
import shutil
from sklearn.model_selection import train_test_split

# Define input and output paths
INPUT_PATH = '/kaggle/input/rwf2000/RWF-2000'
OUTPUT_PATH = '/kaggle/working'

def combine_datasets():
    # Create new directories for combined dataset
    combined_path = os.path.join(OUTPUT_PATH, 'combined_dataset')
    os.makedirs(os.path.join(combined_path, 'Fight'), exist_ok=True)
    os.makedirs(os.path.join(combined_path, 'NonFight'), exist_ok=True)
    
    # Define source directories
    train_fight = os.path.join(INPUT_PATH, 'train', 'Fight')
    val_fight = os.path.join(INPUT_PATH, 'val', 'Fight')
    train_nonfight = os.path.join(INPUT_PATH, 'train', 'NonFight')
    val_nonfight = os.path.join(INPUT_PATH, 'val', 'NonFight')
    
    # Copy all videos to combined directories
    for src_dir, dest_dir in [(train_fight, 'Fight'), (val_fight, 'Fight'),
                             (train_nonfight, 'NonFight'), (val_nonfight, 'NonFight')]:
        for video in os.listdir(src_dir):
            shutil.copy2(
                os.path.join(src_dir, video),
                os.path.join(combined_path, dest_dir, video)
            )
    
    print(f"Combined dataset created at: {combined_path}")
    return combined_path

In [None]:
#2
def extract_frames(video_path, num_frames=30):
    frames = []
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        return None
    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    indices = np.linspace(0, total_frames-1, num_frames, dtype=int)
    
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            frame = cv2.resize(frame, (224, 224))
            frame = frame / 255.0
            frames.append(frame)
    
    cap.release()
    
    if len(frames) != num_frames:
        print(f"Warning: Could not extract {num_frames} frames from {video_path}")
        return None
        
    return np.array(frames)

def prepare_dataset(combined_path, num_frames=30):
    X = []
    y = []
    
    # Process Fight videos
    fight_path = os.path.join(combined_path, 'Fight')
    print("Processing Fight videos...")
    for i, video in enumerate(os.listdir(fight_path)):
        frames = extract_frames(os.path.join(fight_path, video), num_frames)
        if frames is not None:
            X.append(frames)
            y.append(1)
        if (i + 1) % 100 == 0:
            print(f"Processed {i + 1} Fight videos")
    
    # Process NonFight videos
    nonfight_path = os.path.join(combined_path, 'NonFight')
    print("\nProcessing NonFight videos...")
    for i, video in enumerate(os.listdir(nonfight_path)):
        frames = extract_frames(os.path.join(nonfight_path, video), num_frames)
        if frames is not None:
            X.append(frames)
            y.append(0)
        if (i + 1) % 100 == 0:
            print(f"Processed {i + 1} NonFight videos")
    
    return np.array(X), np.array(y)


In [None]:
#3
def create_model(num_frames=30):
    base_model = MobileNetV2(
        input_shape=(224, 224, 3),
        include_top=False,
        weights='imagenet'
    )
    
    base_model.trainable = False
    
    model = Sequential([
        TimeDistributed(base_model, input_shape=(num_frames, 224, 224, 3)),
        TimeDistributed(GlobalAveragePooling2D()),
        LSTM(256, return_sequences=True),
        LSTM(128),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    return model

In [None]:
#222222
def train_model():
    # Set parameters
    num_frames = 30
    batch_size = 8
    epochs = 50  # Increased to 50 epochs
    
    # Combine datasets
    print("Combining datasets...")
    combined_path = combine_datasets()
    
    # Prepare dataset
    print("\nPreparing dataset...")
    X, y = prepare_dataset(combined_path, num_frames)
    
    # Save preprocessed data
    np.save(os.path.join(OUTPUT_PATH, 'X_preprocessed.npy'), X)
    np.save(os.path.join(OUTPUT_PATH, 'y_preprocessed.npy'), y)
    print(f"\nPreprocessed data saved to {OUTPUT_PATH}")
    
    # First split: separate test set (20% of total data)
    X_temp, X_test, y_temp, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    # Second split: divide remaining data into train (87.5%) and validation (12.5%)
    # This gives us 70% train and 10% validation of the total data
    X_train, X_val, y_train, y_val = train_test_split(
        X_temp, y_temp, test_size=0.125, random_state=42, stratify=y_temp
    )
    
    print(f"\nData split sizes:")
    print(f"Training samples: {len(X_train)} ({len(X_train)/len(X)*100:.1f}%)")
    print(f"Validation samples: {len(X_val)} ({len(X_val)/len(X)*100:.1f}%)")
    print(f"Test samples: {len(X_test)} ({len(X_test)/len(X)*100:.1f}%)")
    
    # Create and compile model
    print("\nCreating and compiling model...")
    model = create_model(num_frames)
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # Create checkpoint callback
    checkpoint_path = os.path.join(OUTPUT_PATH, 'best_model.h5')
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        checkpoint_path,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max'
    )
    
    # Add learning rate reduction callback
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=3,
        min_lr=1e-6
    )
    
    # Train model
    print("\nTraining model...")
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[
            checkpoint_callback,
            reduce_lr,
            tf.keras.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=7,
                restore_best_weights=True
            )
        ]
    )
    
    # Save training history
    np.save(os.path.join(OUTPUT_PATH, 'training_history.npy'), history.history)
    
    # Evaluate model
    print("\nEvaluating model...")
    evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test)
    
    return model, history

In [None]:
#2222222
def evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test):
    # Evaluate on training set
    train_loss, train_accuracy = model.evaluate(X_train, y_train)
    print(f"\nTraining accuracy: {train_accuracy*100:.2f}%")
    
    # Evaluate on validation set
    val_loss, val_accuracy = model.evaluate(X_val, y_val)
    print(f"Validation accuracy: {val_accuracy*100:.2f}%")
    
    # Evaluate on test set
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print(f"Test accuracy: {test_accuracy*100:.2f}%")
    
    # Make predictions on test set
    predictions = model.predict(X_test)
    predictions = (predictions > 0.5).astype(int)
    
    # Calculate metrics
    from sklearn.metrics import classification_report, confusion_matrix
    
    # Save and print classification report
    report = classification_report(y_test, predictions)
    with open(os.path.join(OUTPUT_PATH, 'classification_report.txt'), 'w') as f:
        f.write("Test Set Classification Report:\n")
        f.write(report)
        f.write(f"\nTraining Accuracy: {train_accuracy*100:.2f}%\n")
        f.write(f"Validation Accuracy: {val_accuracy*100:.2f}%\n")
        f.write(f"Test Accuracy: {test_accuracy*100:.2f}%\n")
    
    print("\nClassification Report:")
    print(report)
    
    # Save and print confusion matrix
    cm = confusion_matrix(y_test, predictions)
    np.save(os.path.join(OUTPUT_PATH, 'confusion_matrix.npy'), cm)
    print("\nConfusion Matrix:")
    print(cm)
    
    # Plot training history
    import matplotlib.pyplot as plt
    
    plt.figure(figsize=(12, 4))
    
    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_PATH, 'training_history.png'))
    plt.close()


In [None]:
#4
def train_model():
    # Set parameters
    num_frames = 30
    batch_size = 8
    epochs = 20
    
    # Combine datasets
    print("Combining datasets...")
    combined_path = combine_datasets()
    
    # Prepare dataset
    print("\nPreparing dataset...")
    X, y = prepare_dataset(combined_path, num_frames)
    
    # Save preprocessed data
    np.save(os.path.join(OUTPUT_PATH, 'X_preprocessed.npy'), X)
    np.save(os.path.join(OUTPUT_PATH, 'y_preprocessed.npy'), y)
    print(f"\nPreprocessed data saved to {OUTPUT_PATH}")
    
    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    # Create and compile model
    print("\nCreating and compiling model...")
    model = create_model(num_frames)
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # Create checkpoint callback
    checkpoint_path = os.path.join(OUTPUT_PATH, 'best_model.h5')
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        checkpoint_path,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max'
    )
    
    # Train model
    print("\nTraining model...")
    history = model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[
            checkpoint_callback,
            tf.keras.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=5,
                restore_best_weights=True
            )
        ]
    )
    
    # Save training history
    np.save(os.path.join(OUTPUT_PATH, 'training_history.npy'), history.history)
    
    # Evaluate model
    print("\nEvaluating model...")
    evaluate_model(model, X_test, y_test)
    
    return model, history


In [None]:
#5
def evaluate_model(model, X_test, y_test):
    # Evaluate model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test accuracy: {accuracy*100:.2f}%")
    
    # Make predictions
    predictions = model.predict(X_test)
    predictions = (predictions > 0.5).astype(int)
    
    # Calculate metrics
    from sklearn.metrics import classification_report, confusion_matrix
    
    # Save and print classification report
    report = classification_report(y_test, predictions)
    with open(os.path.join(OUTPUT_PATH, 'classification_report.txt'), 'w') as f:
        f.write(report)
    print("\nClassification Report:")
    print(report)
    
    # Save and print confusion matrix
    cm = confusion_matrix(y_test, predictions)
    np.save(os.path.join(OUTPUT_PATH, 'confusion_matrix.npy'), cm)
    print("\nConfusion Matrix:")
    print(cm)    

In [None]:
#6
if __name__ == "__main__":
    print("Starting violence detection model training...")
    model, history = train_model()
    print("\nTraining completed. All results saved to:", OUTPUT_PATH)


In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
import shutil
from sklearn.model_selection import train_test_split

# Define input and output paths
INPUT_PATH = '/kaggle/input/rwf2000/RWF-2000'
OUTPUT_PATH = '/kaggle/working'

def combine_datasets():
    # Create new directories for combined dataset
    combined_path = os.path.join(OUTPUT_PATH, 'combined_dataset')
    os.makedirs(os.path.join(combined_path, 'Fight'), exist_ok=True)
    os.makedirs(os.path.join(combined_path, 'NonFight'), exist_ok=True)
    
    # Define source directories
    train_fight = os.path.join(INPUT_PATH, 'train', 'Fight')
    val_fight = os.path.join(INPUT_PATH, 'val', 'Fight')
    train_nonfight = os.path.join(INPUT_PATH, 'train', 'NonFight')
    val_nonfight = os.path.join(INPUT_PATH, 'val', 'NonFight')
    
    # Copy all videos to combined directories
    for src_dir, dest_dir in [(train_fight, 'Fight'), (val_fight, 'Fight'),
                             (train_nonfight, 'NonFight'), (val_nonfight, 'NonFight')]:
        for video in os.listdir(src_dir):
            shutil.copy2(
                os.path.join(src_dir, video),
                os.path.join(combined_path, dest_dir, video)
            )
    
    print(f"Combined dataset created at: {combined_path}")
    return combined_path

In [None]:
def extract_frames(video_path, num_frames=20):  # Reduced from 30 to 20 frames
    frames = []
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        return None
    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    indices = np.linspace(0, total_frames-1, num_frames, dtype=int)
    
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            # Reduce image size from 224x224 to 160x160
            frame = cv2.resize(frame, (160, 160))
            # Convert to float16 instead of float32
            frame = (frame / 255.0).astype(np.float16)
            frames.append(frame)
    
    cap.release()
    return np.array(frames) if frames else None

def prepare_dataset(combined_path, num_frames=20, batch_size=32):
    X = []
    y = []
    count = 0
    
    # Process Fight videos
    fight_path = os.path.join(combined_path, 'Fight')
    print("Processing Fight videos...")
    
    # Process videos in batches and save immediately
    for i, video in enumerate(os.listdir(fight_path)):
        frames = extract_frames(os.path.join(fight_path, video), num_frames)
        if frames is not None:
            X.append(frames)
            y.append(1)
            count += 1
            
        # Save batch and clear memory
        if count % batch_size == 0:
            X_batch = np.array(X)
            y_batch = np.array(y)
            
            # Save batch
            batch_number = count // batch_size
            np.save(os.path.join(OUTPUT_PATH, f'X_batch_{batch_number}.npy'), X_batch)
            np.save(os.path.join(OUTPUT_PATH, f'y_batch_{batch_number}.npy'), y_batch)
            
            # Clear memory
            X = []
            y = []
            
        if (i + 1) % 100 == 0:
            print(f"Processed {i + 1} Fight videos")
    
    # Same process for NonFight videos
    nonfight_path = os.path.join(combined_path, 'NonFight')
    print("\nProcessing NonFight videos...")
    
    for i, video in enumerate(os.listdir(nonfight_path)):
        frames = extract_frames(os.path.join(nonfight_path, video), num_frames)
        if frames is not None:
            X.append(frames)
            y.append(0)
            count += 1
            
        if count % batch_size == 0:
            X_batch = np.array(X)
            y_batch = np.array(y)
            
            batch_number = count // batch_size
            np.save(os.path.join(OUTPUT_PATH, f'X_batch_{batch_number}.npy'), X_batch)
            np.save(os.path.join(OUTPUT_PATH, f'y_batch_{batch_number}.npy'), y_batch)
            
            X = []
            y = []
            
        if (i + 1) % 100 == 0:
            print(f"Processed {i + 1} NonFight videos")
    
    # Save any remaining samples
    if X:
        X_batch = np.array(X)
        y_batch = np.array(y)
        batch_number = (count // batch_size) + 1
        np.save(os.path.join(OUTPUT_PATH, f'X_batch_{batch_number}.npy'), X_batch)
        np.save(os.path.join(OUTPUT_PATH, f'y_batch_{batch_number}.npy'), y_batch)
    
    return count, batch_number


In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, batch_numbers, batch_size, output_path):
        self.batch_numbers = batch_numbers
        self.batch_size = batch_size
        self.output_path = output_path
        
    def __len__(self):
        return len(self.batch_numbers)
    
    def __getitem__(self, idx):
        batch_number = self.batch_numbers[idx]
        X = np.load(os.path.join(self.output_path, f'X_batch_{batch_number}.npy'))
        y = np.load(os.path.join(self.output_path, f'y_batch_{batch_number}.npy'))
        return X, y

def create_model(num_frames=20):
    base_model = MobileNetV2(
        input_shape=(160, 160, 3),  # Reduced input size
        include_top=False,
        weights='imagenet'
    )
    
    base_model.trainable = False
    
    model = Sequential([
        TimeDistributed(base_model, input_shape=(num_frames, 160, 160, 3)),
        TimeDistributed(GlobalAveragePooling2D()),
        LSTM(128, return_sequences=True),  # Reduced from 256 to 128
        LSTM(64),  # Reduced from 128 to 64
        Dense(32, activation='relu'),  # Reduced from 64 to 32
        Dense(1, activation='sigmoid')
    ])
    
    return model
    
def train_model():
    num_frames = 20
    batch_size = 32
    epochs = 50
    
    # First combine the datasets
    print("Combining datasets...")
    combined_path = combine_datasets()
    
    # Prepare dataset and save in batches
    print("Preparing dataset...")
    total_samples, num_batches = prepare_dataset(combined_path, num_frames, batch_size)
    
    # Create batch numbers for train/val/test split
    batch_numbers = list(range(1, num_batches + 1))
    np.random.shuffle(batch_numbers)
    
    # Split batch numbers
    train_idx = int(0.7 * len(batch_numbers))
    val_idx = int(0.8 * len(batch_numbers))
    
    train_batches = batch_numbers[:train_idx]
    val_batches = batch_numbers[train_idx:val_idx]
    test_batches = batch_numbers[val_idx:]
    
    # Create data generators
    train_generator = DataGenerator(train_batches, batch_size, OUTPUT_PATH)
    val_generator = DataGenerator(val_batches, batch_size, OUTPUT_PATH)
    test_generator = DataGenerator(test_batches, batch_size, OUTPUT_PATH)
    
    # Create and train model
    print("\nCreating and compiling model...")
    model = create_model(num_frames)
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # Create callbacks
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(OUTPUT_PATH, 'best_model.keras'),  # Changed from .h5 to .keras
        monitor='val_accuracy',
        save_best_only=True,
        mode='max'
    )
    
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    )
    
    # Add learning rate reduction callback
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=3,
        min_lr=1e-6
    )
    
    print("\nStarting model training...")
    history = model.fit(
        train_generator,
        validation_data=val_generator,
        epochs=epochs,
        callbacks=[
            checkpoint_callback,
            early_stopping,
            reduce_lr
        ]
    )
    
    # Evaluate model
    print("\nEvaluating model...")
    test_loss, test_accuracy = model.evaluate(test_generator)
    print(f"\nTest accuracy: {test_accuracy*100:.2f}%")
    
    # Save training history
    np.save(os.path.join(OUTPUT_PATH, 'training_history.npy'), history.history)
    
    # Save final model
    model.save(os.path.join(OUTPUT_PATH, 'final_model.keras'))
    
    return model, history

def model2():
    num_frames = 20
    batch_size = 32
    epochs = 50
    num_batches=20
    # Create batch numbers for train/val/test split
    batch_numbers = list(range(1, num_batches + 1))
    np.random.shuffle(batch_numbers)
    
    # Split batch numbers
    train_idx = int(0.7 * len(batch_numbers))
    val_idx = int(0.8 * len(batch_numbers))
    
    train_batches = batch_numbers[:train_idx]
    val_batches = batch_numbers[train_idx:val_idx]
    test_batches = batch_numbers[val_idx:]
    train_generator = DataGenerator(train_batches, batch_size, OUTPUT_PATH)
    val_generator = DataGenerator(val_batches, batch_size, OUTPUT_PATH)
    test_generator = DataGenerator(test_batches, batch_size, OUTPUT_PATH)
    
    # Create and train model
    print("\nCreating and compiling model...")
    model = create_model(num_frames)
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # Create callbacks
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(OUTPUT_PATH, 'best_model.keras'),  # Changed from .h5 to .keras
        monitor='val_accuracy',
        save_best_only=True,
        mode='max'
    )
    
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True
    )
    
    # Add learning rate reduction callback
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=3,
        min_lr=1e-6
    )
    
    print("\nStarting model training...")
    history = model.fit(
        train_generator,
        validation_data=val_generator,
        epochs=epochs,
        callbacks=[
            checkpoint_callback,
            early_stopping,
            reduce_lr
        ]
    )
    
    # Evaluate model
    print("\nEvaluating model...")
    test_loss, test_accuracy = model.evaluate(test_generator)
    print(f"\nTest accuracy: {test_accuracy*100:.2f}%")
    
    # Save training history
    np.save(os.path.join(OUTPUT_PATH, 'training_history.npy'), history.history)
    
    # Save final model
    model.save(os.path.join(OUTPUT_PATH, 'final_model.keras'))
    
    return model, history

def evaluate_model(model, X_test, y_test):
    # Evaluate model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test accuracy: {accuracy*100:.2f}%")
    
    # Make predictions
    predictions = model.predict(X_test)
    predictions = (predictions > 0.5).astype(int)
    
    # Calculate metrics
    from sklearn.metrics import classification_report, confusion_matrix
    
    # Save and print classification report
    report = classification_report(y_test, predictions)
    with open(os.path.join(OUTPUT_PATH, 'classification_report.txt'), 'w') as f:
        f.write(report)
    print("\nClassification Report:")
    print(report)
    
    # Save and print confusion matrix
    cm = confusion_matrix(y_test, predictions)
    np.save(os.path.join(OUTPUT_PATH, 'confusion_matrix.npy'), cm)
    print("\nConfusion Matrix:")
    print(cm)    


In [None]:
if __name__ == "__main__":
    print("Starting violence detection model training...")
    model, history = train_model()
    print("\nTraining completed. All results saved to:", OUTPUT_PATH)

In [None]:
model2()

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
import shutil
from sklearn.model_selection import train_test_split

# Define paths
INPUT_PATH = '/kaggle/input/rwf2000/RWF-2000'
OUTPUT_PATH = '/kaggle/working'

def prepare_split_datasets():
    """
    Prepare train, validation, and test datasets with specific splits:
    Train: 600 fight + 600 non-fight
    Val: 200 fight + 200 non-fight
    Test: 200 fight + 200 non-fight
    """
    # Create directories
    for split in ['train', 'val', 'test']:
        for label in ['Fight', 'NonFight']:
            os.makedirs(os.path.join(OUTPUT_PATH, split, label), exist_ok=True)
    
    # Process Fight videos
    fight_videos = []
    fight_path = os.path.join(INPUT_PATH, 'train', 'Fight')
    fight_videos.extend([os.path.join(fight_path, f) for f in os.listdir(fight_path)])
    fight_path = os.path.join(INPUT_PATH, 'val', 'Fight')
    fight_videos.extend([os.path.join(fight_path, f) for f in os.listdir(fight_path)])
    
    # Process NonFight videos
    nonfight_videos = []
    nonfight_path = os.path.join(INPUT_PATH, 'train', 'NonFight')
    nonfight_videos.extend([os.path.join(nonfight_path, f) for f in os.listdir(nonfight_path)])
    nonfight_path = os.path.join(INPUT_PATH, 'val', 'NonFight')
    nonfight_videos.extend([os.path.join(nonfight_path, f) for f in os.listdir(nonfight_path)])
    
    # Random shuffle
    np.random.shuffle(fight_videos)
    np.random.shuffle(nonfight_videos)
    
    # Split videos
    fight_train = fight_videos[:600]
    fight_val = fight_videos[600:800]
    fight_test = fight_videos[800:1000]
    
    nonfight_train = nonfight_videos[:600]
    nonfight_val = nonfight_videos[600:800]
    nonfight_test = nonfight_videos[800:1000]
    
    # Copy videos to respective directories
    splits = {
        'train': (fight_train, nonfight_train),
        'val': (fight_val, nonfight_val),
        'test': (fight_test, nonfight_test)
    }
    
    for split_name, (fight_split, nonfight_split) in splits.items():
        print(f"\nProcessing {split_name} split:")
        
        # Copy fight videos
        for video_path in fight_split:
            video_name = os.path.basename(video_path)
            dest_path = os.path.join(OUTPUT_PATH, split_name, 'Fight', video_name)
            shutil.copy2(video_path, dest_path)
        
        # Copy nonfight videos
        for video_path in nonfight_split:
            video_name = os.path.basename(video_path)
            dest_path = os.path.join(OUTPUT_PATH, split_name, 'NonFight', video_name)
            shutil.copy2(video_path, dest_path)
        
        print(f"Fight videos: {len(fight_split)}")
        print(f"NonFight videos: {len(nonfight_split)}")
    
    return True

def extract_frames(video_path, num_frames=30):
    frames = []
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        return None
    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    indices = np.linspace(0, total_frames-1, num_frames, dtype=int)
    
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            frame = cv2.resize(frame, (224, 224))  # Using full resolution
            frame = frame / 255.0
            frames.append(frame)
    
    cap.release()
    return np.array(frames) if frames else None

class VideoDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, base_path, batch_size=16):
        self.base_path = base_path
        self.batch_size = batch_size
        
        # Get all video paths
        self.fight_videos = [os.path.join(base_path, 'Fight', f) 
                           for f in os.listdir(os.path.join(base_path, 'Fight'))]
        self.nonfight_videos = [os.path.join(base_path, 'NonFight', f) 
                              for f in os.listdir(os.path.join(base_path, 'NonFight'))]
        
        self.video_paths = self.fight_videos + self.nonfight_videos
        self.labels = [1] * len(self.fight_videos) + [0] * len(self.nonfight_videos)
        
        # Shuffle the data
        p = np.random.permutation(len(self.video_paths))
        self.video_paths = np.array(self.video_paths)[p]
        self.labels = np.array(self.labels)[p]
    
    def __len__(self):
        return len(self.video_paths) // self.batch_size
    
    def __getitem__(self, idx):
        batch_videos = self.video_paths[idx*self.batch_size:(idx+1)*self.batch_size]
        batch_labels = self.labels[idx*self.batch_size:(idx+1)*self.batch_size]
        
        X = []
        y = []
        
        for video_path, label in zip(batch_videos, batch_labels):
            frames = extract_frames(video_path)
            if frames is not None:
                X.append(frames)
                y.append(label)
        
        return np.array(X), np.array(y)

def create_model(num_frames=30):
    base_model = MobileNetV2(
        input_shape=(224, 224, 3),
        include_top=False,
        weights='imagenet'
    )
    
    base_model.trainable = False
    
    model = Sequential([
        TimeDistributed(base_model, input_shape=(num_frames, 224, 224, 3)),
        TimeDistributed(GlobalAveragePooling2D()),
        LSTM(256, return_sequences=True),
        LSTM(128),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    return model

# Main training function
def train_model():
    # Prepare datasets
    print("Preparing datasets...")
    prepare_split_datasets()
    
    # Create data generators
    train_generator = VideoDataGenerator(os.path.join(OUTPUT_PATH, 'train'), batch_size=16)
    val_generator = VideoDataGenerator(os.path.join(OUTPUT_PATH, 'val'), batch_size=16)
    test_generator = VideoDataGenerator(os.path.join(OUTPUT_PATH, 'test'), batch_size=16)
    
    # Create and compile model
    print("\nCreating and compiling model...")
    model = create_model()
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # Create callbacks
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(OUTPUT_PATH, 'best_model.keras'),
        monitor='val_accuracy',
        save_best_only=True,
        mode='max'
    )
    
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=7,
        restore_best_weights=True
    )
    
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=3,
        min_lr=1e-6
    )
    
    # Train model
    print("\nTraining model...")
    history = model.fit(
        train_generator,
        validation_data=val_generator,
        epochs=50,
        callbacks=[checkpoint_callback, early_stopping, reduce_lr]
    )
    
    # Evaluate on test set
    print("\nEvaluating on test set...")
    test_loss, test_accuracy = model.evaluate(test_generator)
    print(f"\nTest accuracy: {test_accuracy*100:.2f}%")
    
    # Save training history
    np.save(os.path.join(OUTPUT_PATH, 'training_history.npy'), history.history)
    
    return model, history

# Run training
if __name__ == "__main__":
    print("Starting violence detection model training...")
    model, history = train_model()
    print("\nTraining completed. All results saved to:", OUTPUT_PATH)
