In [4]:
#!python -m spacy download en_core_web_md

In [5]:
##pip install typing_extensions==4.6.0


In [6]:
#pip install psycopg2-binary sqlalchemy pgvector  pandas numpy scikit-learn


In [2]:
pip install --upgrade SimpleAI_Image


Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sqlalchemy import create_engine, Table, Column, Integer, MetaData, text
from pgvector.sqlalchemy import Vector
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
import ast
import time

# Timing function
def timeit(method):
    def timed(*args, **kwargs):
        start_time = time.time()
        result = method(*args, **kwargs)
        end_time = time.time()
        print(f"{method.__name__} took {end_time - start_time:.2f} seconds")
        return result
    return timed

# Function to fetch data from PostgreSQL
def fetch_data_from_db():
    print("Fetching data from PostgreSQL...")
    engine = create_engine('postgresql+psycopg2://tobiaspoulsen:Bubber240811@localhost:5432/ThisISATEST')
    query = text("SELECT * FROM vector_data")
    with engine.connect() as conn:
        df = pd.read_sql(query, conn)
    return df

@timeit
def main():
    df = fetch_data_from_db()

    # Convert the 'features' column from string to list of floats
    df['features'] = df['features'].apply(lambda x: np.array(ast.literal_eval(x), dtype=float))

    # Assuming the table has columns: 'id', 'features' (as pgvector), 'label'
    X_embedded = np.array(df['features'].tolist())
    y = df['label'].astype(int)

    # Standardize the extracted features
    print("Standardizing features...")
    scaler = StandardScaler()
    X_embedded = scaler.fit_transform(X_embedded)

    # Dimensionality reduction using PCA
    print("Reducing dimensionality to 3D for visualization...")
    pca_3d = PCA(n_components=3)
    X_pca_3d = pca_3d.fit_transform(X_embedded)

    # Creating a DataFrame for plotting
    plot_df = pd.DataFrame(X_pca_3d, columns=['PCA1', 'PCA2', 'PCA3'])
    plot_df['label'] = y

    # Plotting the 3D scatter plot using plotly
    fig = px.scatter_3d(plot_df, x='PCA1', y='PCA2', z='PCA3', color='label',
                        title='3D PCA of MNIST Vectors',
                        labels={'label': 'Digit'},
                        color_continuous_scale=px.colors.sequential.Viridis)

    fig.update_traces(marker=dict(size=3), selector=dict(mode='markers'))
    fig.show()

    # One-hot encode the target
    y_onehot = to_categorical(y)

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_embedded, y_onehot, test_size=0.2, random_state=42)

    # Dimensionality reduction using PCA for the model
    print("Reducing dimensionality for model training...")
    pca = PCA(n_components=50)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)

    # Build a neural network model
    def build_model(input_shape):
        model = Sequential([
            Flatten(input_shape=input_shape),
            Dense(128, activation='relu'),
            Dropout(0.2),
            Dense(64, activation='relu'),
            Dropout(0.2),
            Dense(10, activation='softmax')
        ])
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    # Train and evaluate the neural network model
    model = build_model(X_train_pca.shape[1:])

    # Define callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.001)

    # Continue training with callbacks
    history = model.fit(
        X_train_pca, y_train,
        epochs=100,  # Increased number of epochs for potential further training
        validation_split=0.2,
        batch_size=32,
        callbacks=[early_stopping, reduce_lr],
        verbose=2
    )

    y_pred = model.predict(X_test_pca)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(y_test, axis=1)

    print("\nNeural Network Model Performance After Additional Training:")
    print(classification_report(y_true_classes, y_pred_classes))

    # Visualization of training history
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

# Run the main function
main()


: 

In [1]:
from ImageAI.DatabaseHandler import DatabaseHandler
from ImageAI.DataProcessor import DataProcessor

2024-05-19 22:47:25.658622: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from sklearn.datasets import fetch_openml

In [3]:
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_preprocess_input


In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_preprocess_input
from sqlalchemy import text


# Define the database URL and table name
db_url = 'postgresql+psycopg2://tobiaspoulsen:Bubber240811@localhost:5432/ThisISATEST'
db_handler = DatabaseHandler(db_url, 'vector_data', 512)

# Instantiate DataProcessor with VGG16 model
data_processor = DataProcessor(db_handler, model_name='VGG16', preprocess_func=vgg_preprocess_input, image_size=(32, 32))

# Load the example dataset (MNIST)
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data[:500]  # Limit to 500 instances for testing
y = mnist.target[:500].astype(int)  # Ensure targets are integers

# Process data and store in database
X_embedded, y = data_processor.process_data(X, y)

# Fetch and preprocess data for visualization
query = text("SELECT * FROM vector_data")
X_embedded, y = data_processor.fetch_and_preprocess_data(query)

# Visualize data
data_processor.visualize_data(X_embedded, y)


  warn(


Reshaping and preprocessing images...
Loading pre-trained model and extracting features...
 1/16 [>.............................] - ETA: 3s

2024-05-19 22:53:12.056322: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Extracted features shape: (500, 512)
Storing vectors in PostgreSQL...
process_data took 1.39 seconds
Standardizing features...
Reducing dimensionality to 3D for visualization...
