# Using pandasql and SQLite to train SQL
# Utilizaçao do SQL através da biblioteca pandasql e SQLite

---
Research fonts: 
1.   Primary author: https://www.youtube.com/c/ritvikmath
1.   Secondary author: https://github.com/GuilhermeRuy97
1.   sqlite3 connection tutorial: http://pythonclub.com.br/gerenciando-banco-dados-sqlite3-python-parte1.html
1.   What is the cursor execute in Python: https://linuxhint.com/cursor-execute-python/
1.   pandasql.org: https://pypi.org/project/pandasql/
1.   sqlite.org: https://www.sqlite.org/docs.html
---

# Import

In [26]:
# Environment configuration

# $ pip install -U pandasql
!pip install pandasql

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [62]:
# Required libraries

import pandas as pd          # used to read the DataFrame
import numpy  as np          # used to create the DataFrame
import random                # used to generate a dataset
from random import sample

from pandasql import sqldf   # pandasql

import sqlite3               # SQLite

# Data

This dataframe was created from imaginary data of students in a certain classroom

In [28]:
data = { 'student' : ['João', 'Ribeiro', 'Evaldo', 'Isaias', 'Jessyca','Anne','Luiz'],    
         'age'     : ['20', '29', '22', '21', '35', '32', '40'],    
         'city'    : ['São Paulo', 'São João', 'Pinhal', 'Mococa', 'Poços', 'Botelhos', 'Pinhal'],
         'birthday': ['021004', '030220', np.nan, '000420', '871212', '000101', np.nan] }

df = pd.DataFrame(data)

In [29]:
# Data type correction

df['birthday'] = pd.to_datetime( df['birthday'], format = '%y%m%d' )
df['age'] = df['age'].astype(int)

In [30]:
df

Unnamed: 0,student,age,city,birthday
0,João,20,São Paulo,2002-10-04
1,Ribeiro,29,São João,2003-02-20
2,Evaldo,22,Pinhal,NaT
3,Isaias,21,Mococa,2000-04-20
4,Jessyca,35,Poços,1987-12-12
5,Anne,32,Botelhos,2000-01-01
6,Luiz,40,Pinhal,NaT


In [31]:
data_employees = { 'student': ['João', 'Ribeiro', 'Isaias','Anne','Luiz'],    
                   'job'    : ['Mechanic', 'Mechanic', 'Chemist', 'Professor', 'Nurse'] }

df2 = pd.DataFrame(data_employees)

In [32]:
df2

Unnamed: 0,student,job
0,João,Mechanic
1,Ribeiro,Mechanic
2,Isaias,Chemist
3,Anne,Professor
4,Luiz,Nurse


# SELECT

In [33]:
# Selecting all data

statement = "SELECT * FROM df LIMIT 5;"

selected_data = sqldf(statement, globals())

print(selected_data)

   student  age       city                    birthday
0     João   20  São Paulo  2002-10-04 00:00:00.000000
1  Ribeiro   29   São João  2003-02-20 00:00:00.000000
2   Evaldo   22     Pinhal                        None
3   Isaias   21     Mococa  2000-04-20 00:00:00.000000
4  Jessyca   35      Poços  1987-12-12 00:00:00.000000


In [34]:
# Selecting name and age

statement = "SELECT student, age FROM df LIMIT 5;"

selected_data = sqldf(statement, globals())

print(selected_data)

   student  age
0     João   20
1  Ribeiro   29
2   Evaldo   22
3   Isaias   21
4  Jessyca   35


Obs: The main function used in pandasql is sqldf. <br>sqldf accepts 2 parameters:
 - a sql query string 
 - an set of session/environment variables (locals() or globals())

# DISTINCT/ COUNT / AS

In [35]:
# Selecting distinct cities

statement = "SELECT DISTINCT city FROM df;"

selected_data = sqldf(statement, globals())

print(selected_data)

        city
0  São Paulo
1   São João
2     Pinhal
3     Mococa
4      Poços
5   Botelhos


In [36]:
# Counting the amount of distinct cities

statement = "SELECT COUNT(DISTINCT city) AS num_cities FROM df;"

selected_data = sqldf(statement, globals())

print(selected_data)

   num_cities
0           6


In [37]:
# Selecting distinct combinations of two cities

statement = "SELECT DISTINCT student, city FROM df;"

selected_data = sqldf(statement, globals())

print(selected_data)

   student       city
0     João  São Paulo
1  Ribeiro   São João
2   Evaldo     Pinhal
3   Isaias     Mococa
4  Jessyca      Poços
5     Anne   Botelhos
6     Luiz     Pinhal


# WHERE

In [38]:
# Selecting with a condition

statement = "SELECT * FROM df WHERE city = 'Pinhal';"

selected_data = sqldf(statement, globals())

print(selected_data)

  student  age    city birthday
0  Evaldo   22  Pinhal     None
1    Luiz   40  Pinhal     None


In [39]:
# Selecting the name of the students that lives in Pinhal

statement = "SELECT student FROM df WHERE city = 'Pinhal';"

selected_data = sqldf(statement, globals())

print(selected_data)

  student
0  Evaldo
1    Luiz


In [40]:
# Selecting the name of the students that lives in Pinhal or São Paulo

statement = "SELECT student FROM df WHERE city = 'Pinhal' OR city = 'São Paulo';"

selected_data = sqldf(statement, globals())

print(selected_data)

  student
0    João
1  Evaldo
2    Luiz


In [41]:
# Selecting the name of the students that do not lives in São Paulo and the age is greater than 25

statement = "SELECT student FROM df WHERE city != 'São Paulo' AND age > 25;"

selected_data = sqldf(statement, globals())

print(selected_data)

   student
0  Ribeiro
1  Jessyca
2     Anne
3     Luiz


In [42]:
# Selecting names ending with 'iz' or starting with 'Je'

statement = "SELECT student FROM df WHERE student LIKE '%iz' OR student LIKE 'Je%';"

selected_data = sqldf(statement, globals())

print(selected_data)

   student
0  Jessyca
1     Luiz


In [43]:
# Selecting the average age of citizens of Pinhal

statement = "SELECT AVG(age) AS mean_age FROM df WHERE city = 'Pinhal';"

selected_data = sqldf(statement, globals())

print(selected_data)

   mean_age
0      31.0


# ORDER BY

In [44]:
# Ordering the data by age ascending

statement = "SELECT age FROM df ORDER BY age;"

selected_data = sqldf(statement, globals())

print(selected_data)

   age
0   20
1   21
2   22
3   29
4   32
5   35
6   40


In [45]:
# Ordering the data by age descending

statement = "SELECT age FROM df ORDER BY age DESC;"

selected_data = sqldf(statement, globals())

print(selected_data)

   age
0   40
1   35
2   32
3   29
4   22
5   21
6   20


In [46]:
# Ordering the data by city ascending and then by age descending

statement = "SELECT city, age FROM df ORDER BY city, age DESC;"

selected_data = sqldf(statement, globals())

print(selected_data)

        city  age
0   Botelhos   32
1     Mococa   21
2     Pinhal   40
3     Pinhal   22
4      Poços   35
5   São João   29
6  São Paulo   20


# GROUP BY

In [47]:
# Grouping the data by city and getting the max age

statement = "SELECT city, MAX(age) FROM df GROUP BY city;"

selected_data = sqldf(statement, globals())

print(selected_data)

        city  MAX(age)
0   Botelhos        32
1     Mococa        21
2     Pinhal        40
3      Poços        35
4   São João        29
5  São Paulo        20


In [48]:
# Grouping the data by city and getting the max age, ordering by max(age) ascending

statement = "SELECT city, MAX(age) AS max_age FROM df GROUP BY city ORDER BY max_age;"

selected_data = sqldf(statement, globals())

print(selected_data)

        city  max_age
0  São Paulo       20
1     Mococa       21
2   São João       29
3   Botelhos       32
4      Poços       35
5     Pinhal       40


In [49]:
# Grouping the data by city and getting the max age, ordering by max(age) ascending with age > 30

statement = "SELECT city, MAX(age) AS max_age FROM df WHERE age > 30 GROUP BY city ORDER BY max_age;"

selected_data = sqldf(statement, globals())

print(selected_data)

       city  max_age
0  Botelhos       32
1     Poços       35
2    Pinhal       40


In [50]:
# Getting the average age grouped by city and returning the city initials

statement = "SELECT SUBSTR(city, 1, 3) AS initials, AVG(age) FROM df GROUP BY city;" # SUBSTR(cidade, 3) gets the last 3 characteres

selected_data = sqldf(statement, globals())

print(selected_data)

  initials  AVG(age)
0      Bot      32.0
1      Moc      21.0
2      Pin      31.0
3      Poç      35.0
4      São      29.0
5      São      20.0


# Having

What is the Difference between Where and Having Clause in SQL? If “Where” clause is used to filter the records from a table that is based on a specified condition, then the “Having” clause is used to filter the record from the groups based on the specified condition.

https://byjus.com/gate/difference-between-where-and-having-clause-in-sql/#:~:text=What%20is%20the%20Difference%20between,based%20on%20the%20specified%20condition.

In [51]:
# Grouping the data by city and getting the average age, ordering by max(age) ascending with mean_age > 20

statement = "SELECT city, AVG(age) AS max_age FROM df GROUP BY city HAVING max_age > 20 ORDER BY max_age;"

selected_data = sqldf(statement, globals())

print(selected_data)

       city  max_age
0    Mococa     21.0
1  São João     29.0
2    Pinhal     31.0
3  Botelhos     32.0
4     Poços     35.0


In [52]:
# Returning a column for the amount of citizens in each city

statement = " \
  SELECT city, SUM(CASE WHEN city = 'Pinhal' THEN 1 ELSE 0 END) AS citizens \
  FROM df \
  GROUP BY city; \
"

selected_data = sqldf(statement, globals())

print(selected_data)

# Extra: returning the percentage of students that lives in 'Pinhal

# statement = " \
#   SELECT aluno, SUM(CASE WHEN cidade = 'Pinhal' THEN 1.0 ELSE 0.0 END)/COUNT(*) AS frac_math \
#   FROM df; \
# "

        city  citizens
0   Botelhos         0
1     Mococa         0
2     Pinhal         2
3      Poços         0
4   São João         0
5  São Paulo         0


# JOINS

<img src = "https://miro.medium.com/max/1200/1*av8Om3HpG1MC7YTLKvyftg.png" width = 150%>

image-font = https://miro.medium.com/max/1200/1*av8Om3HpG1MC7YTLKvyftg.png

In [53]:
# Returning the age among working students

statement = " \
  SELECT S.student, W.job, age \
  FROM df S \
  INNER JOIN df2 W ON S.student = W.student; \
"

selected_data = sqldf(statement, globals())

print(selected_data)

   student        job  age
0     João   Mechanic   20
1  Ribeiro   Mechanic   29
2   Isaias    Chemist   21
3     Anne  Professor   32
4     Luiz      Nurse   40


In [55]:
# Returning the students, their age and the average age among their professions

statement = " \
  SELECT S.student, S.age, W.job, AVG(age) \
  FROM df S \
  INNER JOIN df2 W ON S.student = W.student \
  GROUP BY W.job \
  ORDER BY S.age; \
"

selected_data = sqldf(statement, globals())

print(selected_data)

   student  age        job  AVG(age)
0   Isaias   21    Chemist      21.0
1  Ribeiro   29   Mechanic      24.5
2     Anne   32  Professor      32.0
3     Luiz   40      Nurse      40.0


In [56]:
# Returning the students and their profession even if it's null

statement = " \
  SELECT S.student, W.job \
  FROM df S \
  LEFT JOIN df2 W ON S.student = W.student; \
"

selected_data = sqldf(statement, globals())

print(selected_data)

   student        job
0     João   Mechanic
1  Ribeiro   Mechanic
2   Evaldo       None
3   Isaias    Chemist
4  Jessyca       None
5     Anne  Professor
6     Luiz      Nurse


In [57]:
# Returning the students who don't work

statement = " \
  SELECT S.student \
  FROM df S \
  LEFT JOIN df2 W ON S.student = W.student \
  WHERE W.student IS NULL; \
"

selected_data = sqldf(statement, globals())

print(selected_data)

   student
0   Evaldo
1  Jessyca


In [58]:
# Getting the percentage of students who work among the cities

statement = " \
  SELECT S.city, SUM(CASE WHEN job IS NOT NULL THEN 1.0 ELSE 0.0 END)/COUNT(*) AS frac_working \
  FROM df S \
  LEFT JOIN df2 W ON S.student = W.student \
  GROUP BY city; \
"

selected_data = sqldf(statement, globals())

print(selected_data)

        city  frac_working
0   Botelhos           1.0
1     Mococa           1.0
2     Pinhal           0.5
3      Poços           0.0
4   São João           1.0
5  São Paulo           1.0


In [59]:
# Getting the percentage of students who work among the cities and returning the cities with 100% (cities where all the students works)

statement = " \
  SELECT S.city, SUM(CASE WHEN job IS NOT NULL THEN 1.0 ELSE 0.0 END)/COUNT(*) AS frac_working \
  FROM df S \
  LEFT JOIN df2 W ON S.student = W.student \
  GROUP BY city \
  HAVING frac_working = 1.0; \
"

selected_data = sqldf(statement, globals())

print(selected_data)

        city  frac_working
0   Botelhos           1.0
1     Mococa           1.0
2   São João           1.0
3  São Paulo           1.0


# SELF JOIN

In [60]:
# Returning the difference in age between two students

statement = " \
  SELECT S1.student AS student_1, S1.age AS age_student_1, \
   S2.student AS student_2, S2.age as age_student_2, \
   ABS(S1.age - S2.age) AS age_dif \
  FROM df S1 INNER JOIN df S2\
  WHERE S1.student != S2.student \
  LIMIT 10; \
"
# note that 'INNER JOIN' can be replaced by ','
# note how we use the LIMIT = 10 because of the many possible combinations (X*X-X combinations)

selected_data = sqldf(statement, globals())

print(selected_data)

  student_1  age_student_1 student_2  age_student_2  age_dif
0      João             20   Ribeiro             29        9
1      João             20    Evaldo             22        2
2      João             20    Isaias             21        1
3      João             20   Jessyca             35       15
4      João             20      Anne             32       12
5      João             20      Luiz             40       20
6   Ribeiro             29      João             20        9
7   Ribeiro             29    Evaldo             22        7
8   Ribeiro             29    Isaias             21        8
9   Ribeiro             29   Jessyca             35        6


In [61]:
# Taking the average of all the absolute age differences

statement = " \
  SELECT AVG( ABS(S1.age - S2.age) ) AS age_avg_absdiff \
  FROM df S1, df S2 \
  WHERE S1.student != S2.student; \
"

selected_data = sqldf(statement, globals())

print(selected_data)

   age_avg_absdiff
0         9.333333


# SQLITE

In [63]:
# Biblioteca necessária

import sqlite3

In [65]:
# Create a database connection
conn = sqlite3.connect('df2.db')

# Create a cursor
cur = conn.cursor()

In [66]:
# Fuction that displays the result as a table

def display_as_table(data, headers):
  table = pd.DataFrame(data = data, columns = [i[0] for i in headers])
  return table

**Method cursor.execute ()**:

The execute () method helps us to execute the query and return records according to the query. The syntax of the execute () function is:
```SQL
execute (query, args = None)
```
**Parameters** :

- **query**: This should be a string type.
- **Arguments**: By default, the arguments are **None** because sometimes we can pass only a query like a **SELECT** query which fetches the records and does not require any values. So that’s the reason for the **args=None** by default. But if we want to pass the values in the case of the **INSERT** query, then the type of the arguments must be a tuple, list, or dict only.

**Returns**:

- It will return the count of the numbers of rows affected during the query.

**Return Type**:

- The return type will be an integer (**int**).

| Syntax      | Description |
| ----------- | ----------- |
| fetchone()        | This will return the single row from the result, and if there is no record to return, then it will return as None.       |
| fetchmany([size])	| This will return the number of rows as size specified from the result sets, and if there is no record to return, then it will return []. The default size is 1.        |
|   fetchall()      |    Returns the all or remaining rows from the result set.       |

font: https://linuxhint.com/cursor-execute-python/

# CREATE TABLE

In [79]:
# Creating the Students table

cur.execute("CREATE TABLE IF NOT EXISTS Students(Name text, GPA float, Major text, Year integer)")

<sqlite3.Cursor at 0x7f3de74a6c00>

In [80]:
# Display Students table

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year


# INSERT INTO

In [81]:
# Inserting a student into the table

cur.execute("INSERT INTO Students VALUES ('Joao', 2.9, 'Math', 3)")

cur.execute("Select * FROM Students")

display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,Joao,2.9,Math,3


In [82]:
# Saving the modifications with SQLITE into the connection

conn.commit()

In [83]:
cur.execute('''INSERT INTO Students VALUES 
            ('Marcio', 2.9, 'Math', 3),
            ('Maria', 3.9, 'Math', 3),
            ('Carlos', 2.5, 'Math', 3),
            ('Antonio', 2.7, 'Math', 3),
            ('Joao Cleber', 3.5, 'Math', 3)''')  # note that you can use ''' instead of \ every line

cur.execute("Select * FROM Students")

display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,Joao,2.9,Math,3
1,Marcio,2.9,Math,3
2,Maria,3.9,Math,3
3,Carlos,2.5,Math,3
4,Antonio,2.7,Math,3
5,Joao Cleber,3.5,Math,3


In [84]:
conn.commit()

# you can also conn.rollback()

In [85]:
# Returning the results with GPA greater than 3

statement = " \
  SELECT Name, Major, GPA \
  FROM Students \
  WHERE GPA > 3.0; \
"

cur.execute(statement)
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,Major,GPA
0,Maria,Math,3.9
1,Joao Cleber,Math,3.5


In [86]:
# Inserting into certain columns

statement = " \
  INSERT INTO Students(Name, Major, Year) VALUES ('Maul', 'Math', 1); "

cur.execute(statement)
cur.execute("SELECT * FROM Students")

display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,Joao,2.9,Math,3
1,Marcio,2.9,Math,3
2,Maria,3.9,Math,3
3,Carlos,2.5,Math,3
4,Antonio,2.7,Math,3
5,Joao Cleber,3.5,Math,3
6,Maul,,Math,1


In [87]:
# Getting students with null GPA

statement = " \
  SELECT Name \
  FROM Students \
  WHERE GPA IS NULL; "

cur.execute(statement)

display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name
0,Maul


**INSERT records from another table**

In [88]:
# Creating a second table
cur.execute("CREATE TABLE IF NOT EXISTS NewStudents(Name text, GPA float, Major text, Year integer)") # we could use varchar(255) instead of text for example
cur.execute('''INSERT INTO NewStudents VALUES 
            ('Jessica', 2.0, 'Physics', 1),
            ('Tarcisio', 4.0, 'History', 2)''')

# Fetching the data from NewStudents to Students
cur.execute("INSERT INTO Students SELECT * FROM NewStudents")

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,Joao,2.9,Math,3
1,Marcio,2.9,Math,3
2,Maria,3.9,Math,3
3,Carlos,2.5,Math,3
4,Antonio,2.7,Math,3
5,Joao Cleber,3.5,Math,3
6,Maul,,Math,1
7,Jessica,2.0,Physics,1
8,Tarcisio,4.0,History,2


# UPDATE

In [89]:
# Remember

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,Joao,2.9,Math,3
1,Marcio,2.9,Math,3
2,Maria,3.9,Math,3
3,Carlos,2.5,Math,3
4,Antonio,2.7,Math,3
5,Joao Cleber,3.5,Math,3
6,Maul,,Math,1
7,Jessica,2.0,Physics,1
8,Tarcisio,4.0,History,2


In [90]:
# Exchanging the occurrences of 'Physics' to 'Geograph

cur.execute("UPDATE Students SET Major = 'Geograph' WHERE Major = 'Physics'")

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,Joao,2.9,Math,3
1,Marcio,2.9,Math,3
2,Maria,3.9,Math,3
3,Carlos,2.5,Math,3
4,Antonio,2.7,Math,3
5,Joao Cleber,3.5,Math,3
6,Maul,,Math,1
7,Jessica,2.0,Geograph,1
8,Tarcisio,4.0,History,2


In [91]:
conn.commit()

**Be careful to put the WHERE condition! Otherwise you'll update the whole column**

In [92]:
# Updating without the WHERE condition

cur.execute("UPDATE Students SET Major = 'geograph'")

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,Joao,2.9,geograph,3
1,Marcio,2.9,geograph,3
2,Maria,3.9,geograph,3
3,Carlos,2.5,geograph,3
4,Antonio,2.7,geograph,3
5,Joao Cleber,3.5,geograph,3
6,Maul,,geograph,1
7,Jessica,2.0,geograph,1
8,Tarcisio,4.0,geograph,2


In [93]:
conn.rollback() # Rolling back to fix the mistake

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,Joao,2.9,Math,3
1,Marcio,2.9,Math,3
2,Maria,3.9,Math,3
3,Carlos,2.5,Math,3
4,Antonio,2.7,Math,3
5,Joao Cleber,3.5,Math,3
6,Maul,,Math,1
7,Jessica,2.0,Geograph,1
8,Tarcisio,4.0,History,2


In [94]:
# Also works with more conditions

cur.execute("UPDATE Students SET Major = 'Geograph' WHERE Major = 'Math' and Year = 1")

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,Joao,2.9,Math,3
1,Marcio,2.9,Math,3
2,Maria,3.9,Math,3
3,Carlos,2.5,Math,3
4,Antonio,2.7,Math,3
5,Joao Cleber,3.5,Math,3
6,Maul,,Geograph,1
7,Jessica,2.0,Geograph,1
8,Tarcisio,4.0,History,2


In [95]:
conn.commit()

# ALTER TABLE

In [96]:
# Remember

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,Joao,2.9,Math,3
1,Marcio,2.9,Math,3
2,Maria,3.9,Math,3
3,Carlos,2.5,Math,3
4,Antonio,2.7,Math,3
5,Joao Cleber,3.5,Math,3
6,Maul,,Geograph,1
7,Jessica,2.0,Geograph,1
8,Tarcisio,4.0,History,2


In [97]:
# Adding a new column
cur.execute("ALTER TABLE Students ADD COLUMN InSate bit(1)")  # bit(1) is a True or False type of data

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year,InSate
0,Joao,2.9,Math,3,
1,Marcio,2.9,Math,3,
2,Maria,3.9,Math,3,
3,Carlos,2.5,Math,3,
4,Antonio,2.7,Math,3,
5,Joao Cleber,3.5,Math,3,
6,Maul,,Geograph,1,
7,Jessica,2.0,Geograph,1,
8,Tarcisio,4.0,History,2,


In [98]:
# Renaming the column Students to StudentsData
cur.execute("ALTER TABLE Students RENAME to StudentsData")

cur.execute("SELECT * FROM StudentsData")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year,InSate
0,Joao,2.9,Math,3,
1,Marcio,2.9,Math,3,
2,Maria,3.9,Math,3,
3,Carlos,2.5,Math,3,
4,Antonio,2.7,Math,3,
5,Joao Cleber,3.5,Math,3,
6,Maul,,Geograph,1,
7,Jessica,2.0,Geograph,1,
8,Tarcisio,4.0,History,2,


Trying to access the table Students that was renamed will let to:

OperationalError: no such table: Students

# DROP TABLE

In [101]:
# Drop the table NewStudents

cur.execute("DROP TABLE NewStudents")

<sqlite3.Cursor at 0x7f3de74a6c00>

Trying to access the NewStudents table will let to:

OperationalError: no such table: NewStudents

# SQL INJECTION

"SQL injection is a code injection technique that might destroy your database."

"SQL injection is one of the most common web hacking techniques."

In [106]:
# Creating a second table so we don't lose our Students

cur.execute("CREATE TABLE IF NOT EXISTS InjectionStudents(Name text, GPA float, Major text, Year integer)") # we could use varchar(255) instead of text for example
cur.execute("ALTER TABLE InjectionStudents ADD COLUMN InSate bit(1)") 
cur.execute("INSERT INTO InjectionStudents SELECT * FROM StudentsData")

cur.execute("SELECT * FROM InjectionStudents")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year,InSate
0,Joao,2.9,Math,3,
1,Marcio,2.9,Math,3,
2,Maria,3.9,Math,3,
3,Carlos,2.5,Math,3,
4,Antonio,2.7,Math,3,
5,Joao Cleber,3.5,Math,3,
6,Maul,,Geograph,1,
7,Jessica,2.0,Geograph,1,
8,Tarcisio,4.0,History,2,


In [112]:
# In the system, there is a field to update the name and gpa of a student

def update_student_gpa(name, gpa):
  statement = "UPDATE InjectionStudents SET GPA=%s WHERE Name=%s" % (gpa, name)
  cur.executescript(statement)

In [113]:
# Ex. updating Maria GPA to 3.3

update_student_gpa('"Maria"', 3.3)

cur.execute("SELECT * FROM InjectionStudents")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year,InSate
0,Joao,2.9,Math,3,
1,Marcio,2.9,Math,3,
2,Maria,3.3,Math,3,
3,Carlos,2.5,Math,3,
4,Antonio,2.7,Math,3,
5,Joao Cleber,3.5,Math,3,
6,Maul,,Geograph,1,
7,Jessica,2.0,Geograph,1,
8,Tarcisio,4.0,History,2,


In [114]:
# Using the SQL Injection to drop the table StudentsData
# WARNING: It will drop the whole table

update_student_gpa('"Maria"; DROP TABLE InjectionStudents;', 0)

Trying to access the table InjectionStudents will let to:

OperationalError: no such table: InjectionStudents

## Better (safer) way



This technique is called parameterized queries (querys with placeholders parameters and cast as a string instead of a command)

In [117]:
# In this case, we treat everything as STRING with placeholders

def update_student_gpa(name, gpa):
  statement = "UPDATE StudentsData SET GPA=? WHERE Name=?"  # ? is a placeholder here
  cur.executemany(statement, [(gpa, name)])

In [118]:
# Testing the function again

update_student_gpa("Antonio", 4.0)

cur.execute("SELECT * FROM StudentsData")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year,InSate
0,Joao,2.9,Math,3,
1,Marcio,2.9,Math,3,
2,Maria,3.3,Math,3,
3,Carlos,2.5,Math,3,
4,Antonio,4.0,Math,3,
5,Joao Cleber,3.5,Math,3,
6,Maul,,Geograph,1,
7,Jessica,2.0,Geograph,1,
8,Tarcisio,4.0,History,2,


In [119]:
# Trying a sql injection again

update_student_gpa('"Maria"; DROP TABLE StudentsData;', 0)

In [120]:
cur.execute("SELECT * FROM StudentsData")
display_as_table(cur.fetchall(), cur.description) # Look how it stays intact

Unnamed: 0,Name,GPA,Major,Year,InSate
0,Joao,2.9,Math,3,
1,Marcio,2.9,Math,3,
2,Maria,3.3,Math,3,
3,Carlos,2.5,Math,3,
4,Antonio,4.0,Math,3,
5,Joao Cleber,3.5,Math,3,
6,Maul,,Geograph,1,
7,Jessica,2.0,Geograph,1,
8,Tarcisio,4.0,History,2,


# Primary key

It's a column or a collection of columns that satisfies 3 conditions and can be used to related one column to another:

1.   The column has to be unique
2.   No Null values on the column
3.   Each table has at most 1 primary key


In [123]:
# Create Students table with primary key

statement = '''
CREATE TABLE Students (
Name varchar(255) NOT NULL,
GPA float CHECK (GPA <= 4 AND GPA >= 0),
Major VARCHAR(255) NOT NULL,
Year integer DEFAULT 1,
CONSTRAINT name_pk PRIMARY KEY (Name)
);
'''

# Obs: When you set name to be a primary key, you don't need to declare it as UNIQUE as it is a condition for a primary key

cur.execute(statement)

<sqlite3.Cursor at 0x7f3de74a6c00>

In [124]:
# Inserting data into Students

cur.execute("INSERT INTO Students VALUES ('YODA', 3.5, 'Physics', 3)")
cur.execute("INSERT INTO Students VALUES ('Vader', 3.2, 'Math', 3)")
cur.execute("INSERT INTO Students VALUES ('Kylo', 3.9, 'Physics', 2)")

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description) 

Unnamed: 0,Name,GPA,Major,Year
0,YODA,3.5,Physics,3
1,Vader,3.2,Math,3
2,Kylo,3.9,Physics,2


Trying to insert another student YODA in the column name (wich is a primary key) will let to: 

IntegrityError: UNIQUE constraint failed: Students.Name

Obs: 'YODA' != 'Yoda'

Trying to let the column Name (pk) to be null by inserting null values will let to:

IntegrityError: NOT NULL constraint failed: Students.Name

## Multi-Column Primary Key

We can use a combination of columns to be a PK instead of one specific

In [128]:
# Drop the table Students to create a new one

cur.execute("DROP TABLE Students")

<sqlite3.Cursor at 0x7f3de74a6c00>

In [129]:
# Create Students table with multi-column PK

statement = '''
CREATE TABLE Students (
Name varchar(255) NOT NULL,
GPA float CHECK (GPA <= 4 AND GPA >= 0),
Major varchar(255) NOT NULL,
Year integer DEFAULT 1,
CONSTRAINT name_major_year_pk PRIMARY KEY (Name, Major, Year)
);
'''

cur.execute(statement)

<sqlite3.Cursor at 0x7f3de74a6c00>

In [130]:
# Inserting some data

cur.execute("INSERT INTO Students VALUES ('YODA', 3.5, 'Physics', 3)")
cur.execute("INSERT INTO Students VALUES ('Vader', 3.2, 'Math', 3)")
cur.execute("INSERT INTO Students VALUES ('Kylo', 3.9, 'Physics', 2)")

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description) 

Unnamed: 0,Name,GPA,Major,Year
0,YODA,3.5,Physics,3
1,Vader,3.2,Math,3
2,Kylo,3.9,Physics,2


Trying to insert another student with the same collection (Name, Major, Year) as ('YODA', 3.5, 'Physics', 3) will let to:

IntegrityError: UNIQUE constraint failed: Students.Name, Students.Major, Students.Year

## Auto-Incrementing Integer Primary Key

We can set an automatic index to be our PK

In [133]:
# Drop the table Students to create a new one

cur.execute("DROP TABLE Students")

<sqlite3.Cursor at 0x7f3de74a6c00>

In [134]:
# Create Students table with auto incrementing pk

statement = '''
CREATE TABLE Students (
StudentId integer NOT NULL PRIMARY KEY AUTOINCREMENT,
Name varchar(255) NOT NULL,
GPA float CHECK (GPA <= 4 AND GPA >= 0),
Major varchar(255) NOT NULL,
Year integer DEFAULT 1
);
'''

cur.execute(statement)

<sqlite3.Cursor at 0x7f3de74a6c00>

In [135]:
# Inserting some data

cur.execute("INSERT INTO Students (Name, GPA, Major, Year) VALUES ('YODA', 3.5, 'Physics', 3)")
cur.execute("INSERT INTO Students (Name, GPA, Major, Year) VALUES ('Vader', 3.2, 'Math', 3)")
cur.execute("INSERT INTO Students (Name, GPA, Major, Year) VALUES ('Kylo', 3.9, 'Physics', 2)")

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description) 

Unnamed: 0,StudentId,Name,GPA,Major,Year
0,1,YODA,3.5,Physics,3
1,2,Vader,3.2,Math,3
2,3,Kylo,3.9,Physics,2


In [136]:
# Deleting the student Vader (wich id equals '2') so we see what happens with the column id

cur.execute("DELETE FROM Students WHERE Name = 'Vader'")

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,StudentId,Name,GPA,Major,Year
0,1,YODA,3.5,Physics,3
1,3,Kylo,3.9,Physics,2


## Trying to create 2 primary keys on a table

```
# statement = '''
# CREATE TABLE Students (
# Name varchar(255) NOT NULL,
# GPA float CHECK (GPA <= 4 AND GPA >= 0),
# Major varchar(255) NOT NULL,
# Year integer DEFAULT 1,
# CONSTRAINT name_major_year_pk PRIMARY KEY (Name),
# CONSTRAINT name_major_year_pk PRIMARY KEY (Year)
# );
# '''

# cur.execute(statement)
```

will let to:

OperationalError: table "Students" has more than one primary key

# Foreing Key

"pragma foreign_keys = on enforces foreign keys. This is usually necessary because by default, SQLite does not enforce foreign keys."

https://renenyffenegger.ch/notes/development/databases/SQLite/sql/pragma/foreign_keys

In [143]:
statement = "pragma foreign_keys = on;"

conn.execute(statement)

cur = conn.cursor()

In [153]:
# Drop the table Students to create a new one

cur.execute("DROP TABLE Students")

<sqlite3.Cursor at 0x7f3de738d260>

In [154]:
statement = '''
CREATE TABLE Students (
Name varchar(255) NOT NULL,
GPA float CHECK (GPA <= 4 AND GPA >= 0),
Major VARCHAR(255) NOT NULL,
Year integer DEFAULT 1,
CONSTRAINT name_pk PRIMARY KEY (Name)
);
'''

cur.execute(statement)

<sqlite3.Cursor at 0x7f3de738d260>

In [155]:
# Inserting some data

cur.execute("INSERT INTO Students (Name, GPA, Major, Year) VALUES ('YODA', 3.5, 'Physics', 3)")
cur.execute("INSERT INTO Students (Name, GPA, Major, Year) VALUES ('Vader', 3.2, 'Math', 3)")
cur.execute("INSERT INTO Students (Name, GPA, Major, Year) VALUES ('Kylo', 3.9, 'Physics', 2)")

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,YODA,3.5,Physics,3
1,Vader,3.2,Math,3
2,Kylo,3.9,Physics,2


In [156]:
# Create enrollment table

statement = '''
CREATE TABLE Enrollment(
CourseName varchar(255) NOT NULL,
StudentName varchar(255) NOT NULL
);
'''

cur.execute(statement)
cur.execute("INSERT INTO Enrollment VALUES ('Thermodynamics', 'Yoda')")
cur.execute("INSERT INTO Enrollment VALUES ('Thermodynamics', 'Kylo')")
cur.execute("INSERT INTO Enrollment VALUES ('Calculus', 'Vader')")


cur.execute("SELECT * FROM Enrollment")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,CourseName,StudentName
0,Thermodynamics,Yoda
1,Thermodynamics,Kylo
2,Calculus,Vader


In [157]:
# Should not be allowed: Adding Student to Enrollment table who does not exist in the Students table

cur.execute("INSERT INTO Enrollment VALUES ('Calculus', 'Rey')")

cur.execute("SELECT * FROM Enrollment")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,CourseName,StudentName
0,Thermodynamics,Yoda
1,Thermodynamics,Kylo
2,Calculus,Vader
3,Calculus,Rey


In [158]:
# Should not be allowed: Delete student from Students if it leaves orphans

cur.execute("DELETE FROM Students WHERE Name='Kylo'") # Will leave records of Kylo in the enrollment table

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,YODA,3.5,Physics,3
1,Vader,3.2,Math,3


Look the linkage errors between the two tables

Fixing the problem by adding foreign keys in the enrollment2 table

In [173]:
# Drop the table Students and enrollment to create a new one

cur.execute("DROP TABLE Students")
cur.execute("DROP TABLE Enrollment")

<sqlite3.Cursor at 0x7f3de738d260>

In [174]:
# Recreating Students

statement = '''
CREATE TABLE Students (
Name varchar(255) NOT NULL,
GPA float CHECK (GPA <= 4 AND GPA >= 0),
Major VARCHAR(255) NOT NULL,
Year integer DEFAULT 1,
CONSTRAINT name_pk PRIMARY KEY (Name)
);
'''

cur.execute(statement)

cur.execute("INSERT INTO Students (Name, GPA, Major, Year) VALUES ('YODA', 3.5, 'Physics', 3)")
cur.execute("INSERT INTO Students (Name, GPA, Major, Year) VALUES ('Vader', 3.2, 'Math', 3)")
cur.execute("INSERT INTO Students (Name, GPA, Major, Year) VALUES ('Kylo', 3.9, 'Physics', 2)")

cur.execute("SELECT * FROM Students")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,YODA,3.5,Physics,3
1,Vader,3.2,Math,3
2,Kylo,3.9,Physics,2


In [175]:
# Create enrollment table

statement = '''
CREATE TABLE Enrollment(
CourseName varchar(255) NOT NULL,
StudentName varchar(255) NOT NULL,
FOREIGN KEY (StudentName) REFERENCES Students (Name)
);
''' # connecting the two tables by StudentName (Enrollment) and Name (Students)

cur.execute(statement)

cur.execute("INSERT INTO Enrollment VALUES ('Thermodynamics', 'YODA')")
cur.execute("INSERT INTO Enrollment VALUES ('Thermodynamics', 'Kylo')")
cur.execute("INSERT INTO Enrollment VALUES ('Calculus', 'Vader')")

cur.execute("SELECT * FROM Enrollment")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,CourseName,StudentName
0,Thermodynamics,YODA
1,Thermodynamics,Kylo
2,Calculus,Vader


Obs: there is a bug in this enviroment of google colab to use the next 2 commands

In [167]:
# Should not be allowed: Adding Student to Enrollment tale who does not exist

cur.execute("INSERT INTO Enrollment VALUES ('Calculus', 'Rey')")

<sqlite3.Cursor at 0x7f3de738d260>

In [None]:
# Should not be allowed: Delete student from Students if it leaves orphans

cur.execute("DELETE FROM Students WHERE Name='Kylo'") # Will leave records of Kylo in the enrollment table

# VIEWS

"The main difference between view and table is that view is a virtual table based on the result set of an SQL statement, while a table is a database object that consists of rows and columns that store data of a database."

https://pediaa.com/what-is-the-difference-between-view-and-table/#:~:text=The%20main%20difference%20between%20view,store%20data%20of%20a%20database.

"Views are generally used when data is to be accessed infrequently and data in table get updated on frequent basis. On other hand Materialized Views are used when data is to be accessed frequently and data in table not get updated on frequent basis"

https://www.tutorialspoint.com/difference-between-views-and-materialized-views-in-sql#:~:text=Views%20are%20generally%20used%20when,get%20updated%20on%20frequent%20basis.


In [176]:
# Creating a agreggate view

cur.execute('''
CREATE VIEW MajorInfo AS
SELECT Major, COUNT(*) AS NumStudents, AVG(GPA) AS AvgGPA
FROM Students
GROUP BY Major
''')

cur.execute("SELECT * FROM MajorInfo")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Major,NumStudents,AvgGPA
0,Math,1,3.2
1,Physics,2,3.7


It behaves like a table, but it's just a visualization

The select * from VIEW command will replicate the statement that created the VIEW, instead of looking into a physical table

In [177]:
# inserting data into the table that originated the view

cur.execute("INSERT INTO Students VALUES ('R2D2', 2.9, 'Math', 2)")

cur.execute("SELECT * FROM MajorInfo")
display_as_table(cur.fetchall(), cur.description)

# Look how the Avg changes

Unnamed: 0,Major,NumStudents,AvgGPA
0,Math,2,3.05
1,Physics,2,3.7


**Advantages of views**

*Security*

- Each user can be given permission to access the database only through a small set of views that contain the specific data the user is authorized to see, thus restricting the user's access to stored data

*Query Simplicity*

- A view can draw data from several different tables and present it as a single table, turning multi-table queries into single-table queries against the view.

*Structural simplicity*

- Views can give a user a "personalized" view of the database structure, presenting the database as a set of virtual tables that make sense for that user.

*Consistency*

- A view can present a consistent, unchanged image of the structure of the database, even if the underlying source tables are split, restructured, or renamed.

*Data Integrity*

- If data is accessed and entered through a view, the DBMS can automatically check the data to ensure that it meets the specified integrity constraints.

*Logical data independence*

- View can make the application and database tables to a certain extent independent. If there is no view, the application must be based on a table. With the view, the program can be established in view of above, to view the program with a database table to be separated.

**Disadvantages of views**

*Performance*

- Views create the appearance of a table, but the DBMS must still translate queries against the view into queries against the underlying source tables. If the view is defined by a complex, multi-table query then simple queries on the views may take considerable time.

*Update restrictions*

- When a user tries to update rows of a view, the DBMS must translate the request into an update on rows of the underlying source tables. This is possible for simple views, but more complex views are often restricted to read-only.

font: https://www.c-sharpcorner.com/blogs/advantages-and-disadvantages-of-views-in-sql-server1

Resume:

Views advantages:

1. Privacity: You protect the Original Data
2. Simplicity: Turn querys into a simple table

Disadvantages:

1. Complexity: You run the query every time you select the view

Trying to insert into the view will let:

OperationalError: cannot modify MajorInfo because it is a view

In [179]:
# Drop the view

cur.execute('DROP VIEW MajorInfo;')

<sqlite3.Cursor at 0x7f3de738d260>

# DATA TYPES

In [183]:
# Drop the table Students to create a new one

cur.execute("DROP TABLE Students")

<sqlite3.Cursor at 0x7f3de738d260>

In [184]:
# Most commons data types

statement = '''
CREATE TABLE Students (
Name varchar(255),
GPA float,
Major VARCHAR(255),
Year integer,
InState bit
);
'''

# Ex. varchar(255) = char up to 255 characteres, float = 10.6, integer = 5, bit = 1 or 0

cur.execute(statement)

statement = '''
CREATE TABLE Essays(
DateSubmitted date,
TimeSubmitted time,
EssayText text
);
'''

# Ex: Date = '2019-12-31', Time = '03:34:23', Text = str up to 2gb

cur.execute(statement)

<sqlite3.Cursor at 0x7f3de738d260>

## Others data types:
https://www.w3schools.com/sql/sql_datatypes.asp

# INDEXING

In [185]:
# Drop the table Students to create a new one

cur.execute("DROP TABLE Students")

<sqlite3.Cursor at 0x7f3de738d260>

In [186]:
statement = '''
CREATE TABLE Students (
Name varchar(255),
GPA float,
Major VARCHAR(255),
Year integer
);
'''

cur.execute(statement)

<sqlite3.Cursor at 0x7f3de738d260>

In [187]:
# Creating random names, gpas, majors and years

letters = 'abcdefghijklmnopqrstuvwxyz'
majors = ['Art', 'Physics', 'History']

rand_names = [''.join(sample(letters, 10)) for _ in range(100000)] 
rand_gpas = [2+random.random() for _ in range(100000)]
rand_majors = [''.join(sample(majors,1)) for _ in range(100000)]
rand_years = [random.randint(1,4) for _ in range (100000)]

cur.executemany("INSERT INTO Students VALUES (?, ?, ?, ?)", zip(rand_names, rand_gpas, rand_majors, rand_years))

<sqlite3.Cursor at 0x7f3de738d260>

In [188]:
cur.execute("SELECT * FROM Students LIMIT 10")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,ckntjgfswl,2.711765,Physics,2
1,zochsutmyw,2.384123,History,2
2,daijkozhvl,2.65135,Physics,1
3,mboskixnay,2.079387,Physics,1
4,iofeqcahsw,2.854959,History,4
5,uovpmxdrkn,2.274681,Art,3
6,jybzfegkoa,2.465856,History,1
7,vzrwbptxmg,2.674073,History,4
8,thgcdbwqlv,2.718086,Physics,1
9,tlnxwzvosy,2.904557,History,1


In [189]:
# In this case, the algoritm will have to go through the entire dataset looking for the where condition as the Name isn't unique

# Measuring time of consult
%%timeit

chosen_student = ''.join(sample(rand_names,1))  # Select a random name to look for in the table
statement = "SELECT * FROM Students WHERE Name = ?"
cur.execute(statement, [chosen_student])

5.35 ms ± 1.32 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [190]:
# Making it faster by adding indexes

# Creating index on the Name column
cur.execute('''
CREATE INDEX idx_name
ON Students (Name);
''')

<sqlite3.Cursor at 0x7f3de738d260>

In [192]:
# there are no apparent (visual) changes

cur.execute("SELECT * FROM Students LIMIT 10")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,Name,GPA,Major,Year
0,ckntjgfswl,2.711765,Physics,2
1,zochsutmyw,2.384123,History,2
2,daijkozhvl,2.65135,Physics,1
3,mboskixnay,2.079387,Physics,1
4,iofeqcahsw,2.854959,History,4
5,uovpmxdrkn,2.274681,Art,3
6,jybzfegkoa,2.465856,History,1
7,vzrwbptxmg,2.674073,History,4
8,thgcdbwqlv,2.718086,Physics,1
9,tlnxwzvosy,2.904557,History,1


In [193]:
# In this case, as we've set the index on the Name column, it'll be faster

%%timeit

chosen_student = ''.join(sample(rand_names,1))
statement = "SELECT * FROM Students WHERE Name = ?"
cur.execute(statement, [chosen_student])

11.8 µs ± 252 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


Warning: as a cost of speeding up the Select statement by creating those lookups for the index, the Update, searchings and Deletes on the table will be slower (each time you change the table, you'll have to update the index)

## Indexing with multi columns

In [194]:
chosen_major = ''.join(sample(rand_majors,1))
chosen_year = random.randint(1,4)
statement = "EXPLAIN QUERY PLAN SELECT * FROM Students WHERE Major = ? AND Year = ?"

# EXPLAIN QUERY PLAN is a magical function that shows what the statement will do when you run it

cur.execute(statement, [chosen_major, chosen_year])
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,selectid,order,from,detail
0,0,0,0,SCAN TABLE Students


In [195]:
# Running time for multi columns

%%timeit

chosen_major = ''.join(sample(rand_majors,1))
chosen_year = random.randint(1,4)
statement = "SELECT * FROM Students WHERE Major = ? AND Year = ?"

cur.execute(statement, [chosen_major, chosen_year])
display_as_table(cur.fetchall(), cur.description)

27.4 ms ± 948 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [196]:
# Adding indexes (with multi columns)

cur.execute('''
CREATE INDEX idx_major_year
ON Students (Major, Year)
''')

<sqlite3.Cursor at 0x7f3de738d260>

In [197]:
# Watching how the querys runs it now

chosen_major = ''.join(sample(rand_majors,1))
chosen_year = random.randint(1,4)
statement = "EXPLAIN QUERY PLAN SELECT * FROM Students WHERE Major = ? AND Year = ?"

# Look how the EXPLAIN changed

cur.execute(statement, [chosen_major, chosen_year])
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,selectid,order,from,detail
0,0,0,0,SEARCH TABLE Students USING INDEX idx_major_ye...


In [198]:
# Running time for multi columns with index

%%timeit

chosen_major = ''.join(sample(rand_majors,1))
chosen_year = random.randint(1,4)
statement = "SELECT * FROM Students WHERE Major = ? AND Year = ?"

cur.execute(statement, [chosen_major, chosen_year])
display_as_table(cur.fetchall(), cur.description)

19.2 ms ± 235 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Other commands on index

In [199]:
# Returning a list of indexes presents on a table

cur.execute("PRAGMA index_list('Students')")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,seq,name,unique,origin,partial
0,0,idx_major_year,0,c,0
1,1,idx_name,0,c,0


In [200]:
# Returning wich columns are envolved on a index

cur.execute("PRAGMA index_info('idx_major_year')")
display_as_table(cur.fetchall(), cur.description)

Unnamed: 0,seqno,cid,name
0,0,2,Major
1,1,3,Year


In [201]:
# Drop index

cur.execute('DROP INDEX idx_major_year')
cur.execute("DROP INDEX idx_name")

<sqlite3.Cursor at 0x7f3de738d260>