In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.feature_extraction.text import TfidfVectorizer
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from statsmodels.tsa.arima.model import ARIMA
import joblib

# Load the dataset
def load_data(file_path):
    """
    Load private access logs.
    
    Parameters:
        file_path (str): Path to the dataset CSV file.
    
    Returns:
        pd.DataFrame: Loaded dataset.
    """
    return pd.read_csv(file_path)

# Preprocess data
def preprocess_data(df):
    """
    Preprocess the data by normalizing numerical values and extracting text features.
    
    Parameters:
        df (pd.DataFrame): Raw data.
    
    Returns:
        tuple: Processed features, vectorized text, and time series data.
    """
    df.dropna(inplace=True)
    
    # Normalize numerical features
    scaler = StandardScaler()
    df[['file_size', 'access_duration']] = scaler.fit_transform(df[['file_size', 'access_duration']])
    
    # TF-IDF Vectorization for textual features
    vectorizer = TfidfVectorizer()
    text_features = vectorizer.fit_transform(df['file_type'])
    
    # Doc2Vec for additional embeddings
    tagged_data = [TaggedDocument(words=row.split(), tags=[i]) for i, row in enumerate(df['file_type'])]
    doc2vec_model = Doc2Vec(tagged_data, vector_size=50, window=2, min_count=1, workers=4)
    doc2vec_embeddings = [doc2vec_model.infer_vector(row.split()) for row in df['file_type']]
    
    # Time series extraction
    time_series_data = df[['timestamp', 'file_size']].set_index('timestamp').resample('H').sum()
    
    return df, text_features, doc2vec_embeddings, time_series_data

# Train Isolation Forest for anomaly detection
def train_isolation_forest(features):
    """
    Train an Isolation Forest model for anomaly detection.
    
    Parameters:
        features: Feature set to train the model.
    
    Returns:
        IsolationForest: Trained model.
    """
    model = IsolationForest(contamination=0.01, random_state=42)
    model.fit(features)
    return model

# Train ARIMA for time series anomaly detection
def train_arima(time_series_data):
    """
    Train an ARIMA model for time series anomaly detection.
    
    Parameters:
        time_series_data (pd.DataFrame): Time series data.
    
    Returns:
        ARIMA: Trained ARIMA model.
    """
    model = ARIMA(time_series_data, order=(5,1,0))
    model_fit = model.fit()
    return model_fit

# Deploy model using Kubeflow (placeholder function)
def deploy_model(model, model_name):
    """
    Deploy model using Kubeflow.
    
    Parameters:
        model: Trained model.
        model_name (str): Name for deployment.
    """
    joblib.dump(model, f'{model_name}.pkl')
    print(f"{model_name} model deployed using Kubeflow.")

# Example execution
file_path = 'private_access_logs.csv'  # Update with actual dataset

# Load and preprocess data
df = load_data(file_path)
df, text_features, doc2vec_embeddings, time_series_data = preprocess_data(df)

# Train models
iso_forest_model = train_isolation_forest(text_features.toarray())
arima_model = train_arima(time_series_data)

# Deploy models
deploy_model(iso_forest_model, 'isolation_forest')
deploy_model(arima_model, 'arima_model')
