In [246]:
import pandas as pd
import numpy as np
import sqlite3

# Transforming the query result into dataframe for easier manipulations.
def query_to_df(query):
    with sqlite3.connect("sql-murder-mystery.db") as connection:
        results_df = pd.read_sql(query, connection)
    return results_df

# Retrieving the table schema
def get_schema(table_name):
    with sqlite3.connect("sql-murder-mystery.db") as connection:
        cursor = connection.cursor()
        cursor.execute(f"PRAGMA table_info('{table_name}')")
        data = cursor.fetchall()
    columns = [desc[0] for desc in cursor.description] # retrieve column names
    df = pd.DataFrame(data, columns=columns) # create pandas DataFrame
    return df

# Checking what tables are in the database.
Firstly, I'm checking what tables are in the database file. <br> For that I'm gonna run a query on 'sqlite_master' table to receive information about the structure of a database.

In [247]:
database_tables = query_to_df("SELECT name FROM sqlite_master WHERE type = 'table'")
print(database_tables)

                     name
0      crime_scene_report
1         drivers_license
2  facebook_event_checkin
3               interview
4      get_fit_now_member
5    get_fit_now_check_in
6                solution
7                  income
8                  person


# Schemas
Knowing what tables I'm working with, I'd check now what kind of data/information is stored inside each table.

### *Crime Scene Report*

In [248]:
crime_scene_report_schema = get_schema("crime_scene_report")
print(crime_scene_report_schema)

   cid         name     type  notnull dflt_value  pk
0    0         date  INTEGER        0       None   0
1    1         type     TEXT        0       None   0
2    2  description     TEXT        0       None   0
3    3         city     TEXT        0       None   0


### *Drivers License*

In [249]:
drivers_license_schema = get_schema("drivers_license")
print(drivers_license_schema)

   cid          name     type  notnull dflt_value  pk
0    0            id  INTEGER        0       None   1
1    1           age  INTEGER        0       None   0
2    2        height  INTEGER        0       None   0
3    3     eye_color     TEXT        0       None   0
4    4    hair_color     TEXT        0       None   0
5    5        gender     TEXT        0       None   0
6    6  plate_number     TEXT        0       None   0
7    7      car_make     TEXT        0       None   0
8    8     car_model     TEXT        0       None   0


### *Facebook Event Checkin*

In [250]:
facebook_event_checkin_schema = get_schema("facebook_event_checkin")
print(facebook_event_checkin_schema)

   cid        name     type  notnull dflt_value  pk
0    0   person_id  INTEGER        0       None   0
1    1    event_id  INTEGER        0       None   0
2    2  event_name     TEXT        0       None   0
3    3        date  INTEGER        0       None   0


### *Interview*

In [251]:
interview_schema = get_schema("interview")
print(interview_schema)

   cid        name     type  notnull dflt_value  pk
0    0   person_id  INTEGER        0       None   0
1    1  transcript     TEXT        0       None   0


### *Get Fit Now Member*

In [252]:
get_fit_now_member_schema = get_schema("get_fit_now_member")
print(get_fit_now_member_schema)

   cid                   name     type  notnull dflt_value  pk
0    0                     id     TEXT        0       None   1
1    1              person_id  INTEGER        0       None   0
2    2                   name     TEXT        0       None   0
3    3  membership_start_date  INTEGER        0       None   0
4    4      membership_status     TEXT        0       None   0


### *Get Fit Now Check In*

In [253]:
get_fit_now_check_in_schema = get_schema("get_fit_now_check_in")
print(get_fit_now_check_in_schema)

   cid            name     type  notnull dflt_value  pk
0    0   membership_id     TEXT        0       None   0
1    1   check_in_date  INTEGER        0       None   0
2    2   check_in_time  INTEGER        0       None   0
3    3  check_out_time  INTEGER        0       None   0


### *Solution*

In [254]:
solution_schema = get_schema("solution_schema")
print(solution_schema)

Empty DataFrame
Columns: [cid, name, type, notnull, dflt_value, pk]
Index: []


### *Income*

In [255]:
income_schema = get_schema("income")
print(income_schema)

   cid           name     type  notnull dflt_value  pk
0    0            ssn     CHAR        0       None   1
1    1  annual_income  INTEGER        0       None   0


### *Person*

In [256]:
person_schema = get_schema("person")
print(person_schema)

   cid                 name     type  notnull dflt_value  pk
0    0                   id  INTEGER        0       None   1
1    1                 name     TEXT        0       None   0
2    2           license_id  INTEGER        0       None   0
3    3       address_number  INTEGER        0       None   0
4    4  address_street_name     TEXT        0       None   0
5    5                  ssn     CHAR        0       None   0


## First Clue:
A crime has taken place and the detective needs your help. The detective gave you the crime scene report, but you somehow lost it.
<br> You vaguely remember that the crime was a ​murder​ that occurred sometime on ​Jan.15, 2018​ and that it took place in ​SQLCity​.

In [257]:
first_clue_results = (query_to_df("""
                                    SELECT description
                                    FROM crime_scene_report
                                    WHERE date = 20180115 AND type = 'murder' AND city = 'SQL City'
                                  """))
with pd.option_context('display.max_colwidth', None):
    print(first_clue_results)

                                                                                                                                                                                 description
0  Security footage shows that there were 2 witnesses. The first witness lives at the last house on "Northwestern Dr". The second witness, named Annabel, lives somewhere on "Franklin Ave".


The results from the first clue:
- 1st. witness lives at the last house on "Northwestern Dr".
- 2nd. witness, named Annabel, lives somewhere on "Franklin Ave".
We can try to find more information about them in 'Person' table.

In [258]:
x = (query_to_df("SELECT * FROM person"))
print(x)

          id                 name  license_id  address_number  \
0      10000   Christoper Peteuil      993845             624   
1      10007  Kourtney Calderwood      861794            2791   
2      10010            Muoi Cary      385336             741   
3      10016          Era Moselle      431897            1987   
4      10025         Trena Hornby      550890             276   
...      ...                  ...         ...             ...   
10006  99936          Luba Benser      274427             680   
10007  99941      Roxana Mckimley      975942            1613   
10008  99965      Cherie Zeimantz      287627            3661   
10009  99982          Allen Cruse      251350            3126   
10010  99990         Vance Hunten      830407            3056   

      address_street_name        ssn  
0            Bankhall Ave  747714076  
1           Gustavus Blvd  477972044  
2         Northwestern Dr  828638512  
3           Wood Glade St  614621061  
4           Daws Hill Wa

In [259]:
second_clue_results = (query_to_df("""
                                    SELECT * 
                                    FROM person 
                                    WHERE address_street_name = 'Northwestern Dr' AND address_number = (SELECT MAX(address_number) FROM person WHERE address_street_name = 'Northwestern Dr')

                                    UNION

                                    SELECT * FROM person WHERE name LIKE '%Annabel%' AND address_street_name = 'Franklin Ave'
                                   """))
print(second_clue_results)

      id            name  license_id  address_number address_street_name  \
0  14887  Morty Schapiro      118009            4919     Northwestern Dr   
1  16371  Annabel Miller      490173             103        Franklin Ave   

         ssn  
0  111564949  
1  318771143  


In [None]:
third_clue_results = (query_to_df("""
                                    SELECT id, transcript
                                    FROM interview, person
                                    WHERE person.id = interview.person_id
                                
                                
                                
                                
                                  """))