# Patient Tables

In [None]:
CREATE TABLE Patients (
    patient_id SERIAL PRIMARY KEY,
    first_name VARCHAR(50),
    last_name VARCHAR(50),
    date_of_birth DATE,
    gender VARCHAR(10),
    diagnosis TEXT,
    address VARCHAR(100),
    phone_number VARCHAR(20),
    email VARCHAR(100),
    date_of_registration TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);


# Research Tables

In [None]:
CREATE TABLE Researchers (
    researcher_id SERIAL PRIMARY KEY,
    first_name VARCHAR(50),
    last_name VARCHAR(50),
    role VARCHAR(50),
    department VARCHAR(50),
    email VARCHAR(100),
    phone_number VARCHAR(20)
);


# Clinical Trials Table

In [None]:
CREATE TABLE Clinical_Trials (
    trial_id SERIAL PRIMARY KEY,
    trial_name VARCHAR(100),
    start_date DATE,
    end_date DATE,
    researcher_id INT REFERENCES Researchers(researcher_id),
    patient_id INT REFERENCES Patients(patient_id),
    status VARCHAR(20),
    findings TEXT
);


# Environments Table

In [None]:
# This table stores information about the different database environments (e.g., test, development, QA, production).
CREATE TABLE Environments (
    environment_id SERIAL PRIMARY KEY,
    environment_name VARCHAR(50),
    status VARCHAR(20),
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);


# User Accounts Table

In [None]:
# The User_Accounts table manages user account information and database roles.

CREATE TABLE User_Accounts (
    user_id SERIAL PRIMARY KEY,
    username VARCHAR(50) UNIQUE,
    password_hash TEXT,
    email VARCHAR(100),
    role VARCHAR(20),  -- e.g., 'Admin', 'Researcher', 'DBA'
    account_created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    last_login TIMESTAMP
);


# Access Control Table

In [None]:
# The Access_Control table manages user access control, specifying which users can access which environments and with what privileges.

CREATE TABLE Access_Control (
    access_id SERIAL PRIMARY KEY,
    user_id INT REFERENCES User_Accounts(user_id),
    environment_id INT REFERENCES Environments(environment_id),
    access_level VARCHAR(20),  -- e.g., 'Read', 'Write', 'Admin'
    granted_on TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);


# Audit Log Table

In [None]:
CREATE TABLE Audit_Log (
    log_id SERIAL PRIMARY KEY,
    user_id INT REFERENCES User_Accounts(user_id),
    action VARCHAR(100),
    action_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    details TEXT
);


# Password Hashing (bcrypt)

In [None]:
# Ensure password security using bcrypt to hash passwords before storing them in the User_Accounts table.

import bcrypt

# Hash a password for the first time
password = "my_secure_password"
hashed_password = bcrypt.hashpw(password.encode('utf-8'), bcrypt.gensalt())

# Check hashed password
bcrypt.checkpw(password.encode('utf-8'), hashed_password)


# ETL Pipeline Script (Python)

In [None]:
import pandas as pd
from sqlalchemy import create_engine

# Database connection string (local PostgreSQL)
DATABASE_TYPE = 'postgresql'
DBAPI = 'psycopg2'
ENDPOINT = 'localhost'  # Use cloud endpoint if deploying to cloud
USER = 'postgres'
PASSWORD = 'your_password'
PORT = 5432
DATABASE = 'airways_research_center'

# Connection to PostgreSQL database
engine = create_engine(f"{DATABASE_TYPE}+{DBAPI}://{USER}:{PASSWORD}@{ENDPOINT}:{PORT}/{DATABASE}")

# Extract: Read CSV files
patients_df = pd.read_csv('patients.csv')
clinical_trials_df = pd.read_csv('clinical_trials.csv')

# Transform: Clean and process the data
# (Assume some cleaning if necessary, e.g., removing nulls, fixing formats)

# Load: Insert data into PostgreSQL tables
patients_df.to_sql('patients', engine, if_exists='append', index=False)
clinical_trials_df.to_sql('clinical_trials', engine, if_exists='append', index=False)

print("Data successfully loaded into PostgreSQL database.")


# Setting Up PostgreSQL Database Locally and on Cloud

In [None]:
psql -U postgres
CREATE DATABASE airways_research_center;
\connect airways_research_center;


ENDPOINT = 'cancercenter-rds-endpoint.amazonaws.com'  # Cloud endpoint

# Performance Tuning for PostgreSQL

In [None]:
#indexing: Add indexes to columns frequently used in WHERE clauses to improve query performance.
CREATE INDEX idx_patient_id ON Patients(patient_id);
CREATE INDEX idx_researcher_id ON Clinical_Trials(researcher_id);

#Query Optimization: Use EXPLAIN to analyze and optimize slow queries.
EXPLAIN ANALYZE SELECT * FROM Patients WHERE diagnosis = 'Asthma';



# NoSQL Database Setup (MongoDB)

In [None]:
#loading data into mongoDB

from pymongo import MongoClient
import pandas as pd

# Connect to MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client.airways_research_center

# Read CSV data
patients_df = pd.read_csv('patients.csv')

# Insert records into MongoDB collection
db.patients.insert_many(patients_df.to_dict('records'))

print("Data successfully loaded into MongoDB.")

#If deploying to MongoDB Atlas (cloud-based MongoDB)
client = MongoClient('mongodb+srv://<username>:<password>@cluster0.mongodb.net/airways_research_center')



# MongoDB Schema for Patients Collection

In [None]:
{
    "patient_id": 1,
    "first_name": "David",
    "last_name": "Jones",
    "date_of_birth": "1980-04-15",
    "gender": "Male",
    "diagnosis": "Asthma",
    "address": "123 Maple St, Winston-Salem, NC",
    "phone_number": "(555)-123-4567",
    "email": "david.jones@example.com",
    "date_of_registration": "2024-01-12 10:15:00"
}


# CI/CD for Database Deployment

In [None]:
pipeline {
    agent any

    stages {
        stage('Test') {
            steps {
                echo 'Running tests...'
                sh 'pytest tests/'  # Run Python tests for ETL
            }
        }
        
        stage('Deploy') {
            steps {
                echo 'Deploying to PostgreSQL database...'
                sh 'python deploy.py'  # Script to deploy database changes
            }
        }
    }

    post {
        success {
            echo 'Deployment successful!'
        }
        failure {
            echo 'Deployment failed.'
        }
    }
}
