# 3. Load Data
## Ensure Environment is Configured
**Go [here](01_import_data.ipynb) if you are following along and have not configured the virtual environment and installed dependencies.**

In [1]:
# Jupyter magic
%run ../util/dependencies.py
%load_ext sql

## Load raw sdr csv

In [3]:
# sdr_raw query
query_create_table_sdr_raw = """
CREATE TABLE IF NOT EXISTS sdr_raw (
  OperatorControlNumber        text,
  DifficultyDate               text,
  SubmissionDate               text,
  OperatorDesignator           text,
  SubmitterDesignator          text,
  SubmitterTypeCode            text,
  ReceivingRegionCode          text,
  ReceivingDistrictOffice      text,
  SDRType                      text,
  JASCCode                     text,
  NatureOfConditionA           text,
  NatureOfConditionB           text,
  NatureOfConditionC           text,
  PrecautionaryProcedureA      text,
  PrecautionaryProcedureB      text,
  PrecautionaryProcedureC      text,
  PrecautionaryProcedureD      text,
  StageOfOperationCode         text,
  HowDiscoveredCode            text,
  RegistryNNumber              text,
  AircraftMake                 text,
  AircraftModel                text,
  AircraftSerialNumber         text,
  AircraftTotalTime            text,
  AircraftTotalCycles          text,
  EngineMake                   text,
  EngineModel                  text,
  EngineSerialNumber           text,
  EngineTotalTime              text,
  EngineTotalCycles            text,
  PropellerMake                text,
  PropellerModel               text,
  PropellerSerialNumber        text,
  PropellerTotalTime           text,
  PropellerTotalCycles         text,
  PartMake                     text,
  PartName                     text,
  PartNumber                   text,
  PartSerialNumber             text,
  PartCondition                text,
  PartLocation                 text,
  PartTotalTime                text,
  PartTotalCycles              text,
  PartTimeSince                text,
  PartSinceCode                text,
  ComponentMake                text,
  ComponentModel               text,
  ComponentName                text,
  ComponentPartNumber          text,
  ComponentSerialNumber        text,
  ComponentLocation            text,
  ComponentTotalTime           text,
  ComponentTotalCycles         text,
  ComponentTimeSince           text,
  ComponentSinceCode           text,
  FuselageStationFrom          text,
  FuselageStationTo            text,
  StringerFrom                 text,
  StringerFromSide             text,
  StringerTo                   text,
  StringerToSide               text,
  WingStationFrom              text,
  WingStationFromSide          text,
  WingStationTo                text,
  WingStationToSide            text,
  ButtLineFrom                 text,
  ButtLineFromSide             text,
  ButtLineTo                   text,
  ButtLineToSide               text,
  WaterLineFrom                text,
  WaterLineTo                  text,
  CrackLength                  text,
  NumberOfCracks               text,
  CorrosionLevel               text,
  StructuralOther              text,
  Discrepancy                  text
);
"""

# 'vectra' database connection
db_connection = psycopg2.connect(dbname='vectra',
                                 host='localhost',
                                 user='robb',
                                 password='thek3yisK#')
cur = db_connection.cursor()
cur.execute(query_create_table_sdr_raw)

# copy contents to sdr_raw table
with open('../data/02_csv/SDR_COMPOSITE_EXPORT.csv', 'r') as f:
    cur.copy_expert("""
        COPY sdr_raw
        FROM STDIN
        WITH CSV HEADER
    """, f)

# commit and close the connection
db_connection.commit()
cur.close()
db_connection.close()

Check sdr_raw

In [None]:
-- Lets test first to ensure that sdr data was properly transfered over to the 'sdr_raw' table . . .
SELECT *
FROM sdr_raw
LIMIT 10;

-- Ensure csv size . . .
SELECT count(*)
FROM sdr_raw;

-- Alright, this checks out!!

## Populate Datatables
### Master Tables
Lets start normalizing the database, by transferring contents of sdr_raw into our other data tables (aircraft, ata_chapter, sdr_event, &)

In [None]:
-- ==========================================================================
-- Populating the Master Tables (aircraft, ata_chapter, jasc_code & operator)
-- ==========================================================================

-- ata_chapter
INSERT INTO ata_chapter(ata_chapter)
SELECT DISTINCT (LEFT(JASCCode, 2))::int
FROM sdr_raw
WHERE JASCCode IS NOT NULL
ON CONFLICT (ata_chapter) DO NOTHING;

-- jasc_code
INSERT INTO jasc_code(jasc_code, ata_chapter)
SELECT DISTINCT JASCcode, (LEFT(JASCCode, 2))::int
FROM sdr_raw
WHERE JASCCode IS NOT NULL
ON CONFLICT (jasc_code) DO NOTHING;

-- operator
INSERT INTO operator (designator)
SELECT DISTINCT OperatorDesignator
FROM sdr_raw
WHERE OperatorDesignator IS NOT NULL
ON CONFLICT (designator) DO NOTHING;

-- aircraft
INSERT INTO aircraft (
    registry_n_number,
    make,
    model,
    serial_number,
    total_time,
    total_cycles
)
SELECT
    RegistryNNumber,
    MAX(AircraftMake) AS make,
    MAX(AircraftModel) AS model,
    MAX(AircraftSerialNumber) AS serial_number,
    MAX(AircraftTotalTime::float) AS total_time,
    MAX(AircraftTotalCycles::float) AS total_cycles
FROM sdr_raw
WHERE RegistryNNumber IS NOT NULL
GROUP BY RegistryNNumber
ON CONFLICT (registry_n_number) DO UPDATE
SET
    make = EXCLUDED.make,
    model = EXCLUDED.model,
    serial_number = EXCLUDED.serial_number,
    total_time = GREATEST(aircraft.total_time, EXCLUDED.total_time),
    total_cycles = GREATEST(aircraft.total_cycles, EXCLUDED.total_cycles);

SyntaxError: invalid syntax (2348617581.py, line 1)

In [None]:
-- Peak view of random entity records
SELECT *
FROM ata_chapter
ORDER BY random()
LIMIT 10;

SELECT * 
FROM jasc_code
ORDER BY random()
LIMIT 10;

SELECT *
FROM operator
ORDER BY random()
LIMIT 10;

SELECT *
FROM aircraft
ORDER BY random()
LIMIT 10;

### SDR Event Table (Behemouth)

In [None]:
-- ==============================
-- Populating the sdr_event Table
-- ==============================
INSERT INTO sdr_event (
    operator_control_number,
    difficulty_date,
    submission_date,
    operator_designator_id,
    submitter_designator_id,
    submitter_type_code,
    receiving_region_code,
    receiving_district_office,
    sdr_type,
    nature_condition_a,
    nature_condition_b,
    nature_condition_c,
    precautionary_proc_a,
    precautionary_proc_b,
    precautionary_proc_c,
    precautionary_proc_d,
    stage_of_operation,
    how_discovered,
    discrepancy,

    -- Foreign keys
    aircraft_id,
    operator_id,
    jasc_code,

    -- ENGINE (event-specific)
    engine_make,
    engine_model,
    engine_serial_number,
    engine_total_time,
    engine_total_cycles,

    -- PROPELLER (event-specific)
    propeller_make,
    propeller_model,
    propeller_serial_number,
    propeller_total_time,
    propeller_total_cycles,

    -- PART (event-specific)
    part_make,
    part_name,
    part_number,
    part_serial_number,
    part_condition,
    part_location,
    part_total_time,
    part_total_cycles,
    part_time_since,
    part_since_code,

    -- COMPONENT (event-specific)
    component_make,
    component_model,
    component_name,
    component_part_number,
    component_serial_number,
    component_location,
    component_total_time,
    component_total_cycles,
    component_time_since,
    component_since_code,

    -- STRUCTURAL LOCATION (event-specific)
    fuselage_station_from,
    fuselage_station_to,
    stringer_from,
    stringer_from_side,
    stringer_to,
    stringer_to_side,
    wing_station_from,
    wing_station_from_side,
    wing_station_to,
    wing_station_to_side,
    butt_line_from,
    butt_line_from_side,
    butt_line_to,
    butt_line_to_side,
    water_line_from,
    water_line_to,
    crack_length,
    number_of_cracks,
    corrosion_level,
    structural_other
    )
SELECT 
    r.OperatorControlNumber,
    r.DifficultyDate::date,
    r.SubmissionDate::date,

    o_op.id, --this is supposed to not be the operator designator, but the id for that operator
    o_sub.id, --this too for submitter, its the id

    r.SubmitterTypeCode,
    r.ReceivingRegionCode,
    r.ReceivingDistrictOffice,
    r.SDRType,
    r.NatureOfConditionA,
    r.NatureOfConditionB,
    r.NatureOfConditionC,
    r.PrecautionaryProcedureA,
    r.PrecautionaryProcedureB,
    r.PrecautionaryProcedureC,
    r.PrecautionaryProcedureD,
    r.StageOfOperationCode,
    r.HowDiscoveredCode,
    r.Discrepancy,

    a.id AS aircraft_id,
    o_op.id AS operator_id,
    j.jasc_code, -- reference automatic?

    r.EngineMake,
    r.EngineModel,
    r.EngineSerialNumber,
    r.EngineTotalTime::float,
    r.EngineTotalCycles::float,

    r.PropellerMake,
    r.PropellerModel,
    r.PropellerSerialNumber,
    r.PropellerTotalTime::float,
    r.PropellerTotalCycles::float,

    r.PartMake,
    r.PartName,
    r.PartNumber,
    r.PartSerialNumber,
    r.PartCondition,
    r.PartLocation,
    r.PartTotalTime::float,
    r.PartTotalCycles::float,
    r.PartTimeSince::float,
    r.PartSinceCode,

    r.ComponentMake,
    r.ComponentModel,
    r.ComponentName,
    r.ComponentPartNumber,
    r.ComponentSerialNumber,
    r.ComponentLocation,
    r.ComponentTotalTime::float,
    r.ComponentTotalCycles::float,
    r.ComponentTimeSince::float,
    r.ComponentSinceCode,

    r.FuselageStationFrom,
    r.FuselageStationTo,
    r.StringerFrom,
    r.StringerFromSide,
    r.StringerTo,
    r.StringerToSide,
    r.WingStationFrom,
    r.WingStationFromSide,
    r.WingStationTo,
    r.WingStationToSide,
    r.ButtLineFrom,
    r.ButtLineFromSide,
    r.ButtLineTo,
    r.ButtLineToSide,
    r.WaterLineFrom,
    r.WaterLineTo,
    r.CrackLength,
    r.NumberOfCracks::float,
    r.CorrosionLevel,
    r.StructuralOther
FROM sdr_raw AS r
    LEFT JOIN aircraft AS a
        ON a.registry_n_number = r.RegistryNNumber
    LEFT JOIN operator AS o_op
        ON o_op.designator = r.OperatorDesignator
    LEFT JOIN operator AS o_sub
        ON o_sub.designator = r.SubmitterDesignator
    LEFT JOIN jasc_code AS j
        ON j.jasc_code = r.JASCCode
WHERE OperatorControlNumber IS NOT NULL
ON CONFLICT (operator_control_number) DO NOTHING;

In [None]:
-- Check SDR_Event
SELECT *
FROM sdr_event
ORDER BY random()
LIMIT 10;