## Read Data

In [52]:
import pandas as pd
import sqlite3
from tqdm import tqdm

In [58]:
df = pd.read_csv('database/worldcities.csv', 
                 usecols=["id", "country", "iso3", "city_ascii", "population", "iso2", "admin_name", "lat", "lng"],
                #  nrows=100, 
                 encoding='utf-8')

In [59]:
df.head()

Unnamed: 0,city_ascii,lat,lng,country,iso2,iso3,admin_name,population,id
0,Tokyo,35.6897,139.6922,Japan,JP,JPN,Tōkyō,37732000.0,1392685764
1,Jakarta,-6.175,106.8275,Indonesia,ID,IDN,Jakarta,33756000.0,1360771077
2,Delhi,28.61,77.23,India,IN,IND,Delhi,32226000.0,1356872604
3,Guangzhou,23.13,113.26,China,CN,CHN,Guangdong,26940000.0,1156237133
4,Mumbai,19.0761,72.8775,India,IN,IND,Mahārāshtra,24973000.0,1356226629


In [60]:
df.shape

(47868, 9)

columns in database = ["ID", "NAME", "COUNTRYCODE", "DISTRICT", "POPULATION", "CITY", "STATE", "LAT_N", "LONG_W"]

columns in csv = ["id", "country", "iso3", "city_ascii", "population", "iso2", "admin_name", "lat", "lng"]

In [61]:
df.rename(columns={"id":"ID", "country":"NAME", "iso3":"COUNTRYCODE", "city_ascii":"DISTRICT", "population":"POPULATION", "admin_name":"CITY", "iso2":"STATE", "lat":"LAT_N", "lng":"LONG_W"}, inplace=True)

In [62]:
df.head()

Unnamed: 0,DISTRICT,LAT_N,LONG_W,NAME,STATE,COUNTRYCODE,CITY,POPULATION,ID
0,Tokyo,35.6897,139.6922,Japan,JP,JPN,Tōkyō,37732000.0,1392685764
1,Jakarta,-6.175,106.8275,Indonesia,ID,IDN,Jakarta,33756000.0,1360771077
2,Delhi,28.61,77.23,India,IN,IND,Delhi,32226000.0,1356872604
3,Guangzhou,23.13,113.26,China,CN,CHN,Guangdong,26940000.0,1156237133
4,Mumbai,19.0761,72.8775,India,IN,IND,Mahārāshtra,24973000.0,1356226629


In [63]:
df.isna().sum()

DISTRICT         1
LAT_N            0
LONG_W           0
NAME             0
STATE           33
COUNTRYCODE      0
CITY           197
POPULATION     212
ID               0
dtype: int64

### Create Database and Create table then Fetach data

In [64]:
# Connect to the database (replace 'your_database.db' with your file)
connection = sqlite3.connect('CITY_DATABASE.db')

# Create a cursor object
cursor = connection.cursor()

table_name = 'city'  # Replace with your table name
cursor.execute(f'DROP TABLE IF EXISTS {table_name};')

# Execute a query to get all table names
cursor.execute("""
                CREATE TABLE IF NOT EXISTS CITY (
                    ID INT PRINERY KEY NOT NULL,
                    NAME VARCHAR2(17), 
                    COUNTRYCODE VARCHAR2(2), 
                    DISTRICT VARCHAR2(20), 
                    POPULATION NUMBER,
                    CITY VARCHAR2(21), 
                    STATE VARCHAR2(2), 
                    LAT_N NUMBER, 
                    LONG_W NUMBER
               );
               """)

# Commit the changes
connection.commit()

# Close the connection
connection.close()


### Insert data

In [65]:
connection = sqlite3.connect("CITY_DATABASE.db")

cursor = connection.cursor()

# Step 4: Insert data into the table with a progress bar
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Inserting Rows"):
    cursor.execute('''
        INSERT INTO city (ID, NAME, COUNTRYCODE, DISTRICT, POPULATION, CITY, STATE, LAT_N, LONG_W) 
                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);
    ''', (row['ID'], row['NAME'], row['COUNTRYCODE'], row['DISTRICT'], row['POPULATION'], row['CITY'], row['STATE'], row['LAT_N'], row['LONG_W']))  # Adjust column names based on your CSV


# Commit the changes and close the connection
connection.commit()
connection.close()

Inserting Rows: 100%|██████████| 47868/47868 [00:00<00:00, 53433.23it/s]


### Featch data from table

In [66]:
# Connect to SQLite database
conn = sqlite3.connect('CITY_DATABASE.db')

# Create a cursor object
cursor = conn.cursor()

# SQL command to select all records from the users table
select_query = 'SELECT * FROM city;'

# Execute the SQL command
cursor.execute(select_query)

# Fetch all results
results = cursor.fetchall()

# Print the results
# for row in results:
#     print(row)

# Optionally convert the results to a pandas DataFrame
df = pd.DataFrame(results, columns=[column[0] for column in cursor.description])
print(df)

# Close the connection
conn.close()


               ID                 NAME COUNTRYCODE          DISTRICT  \
0      1392685764                Japan         JPN             Tokyo   
1      1360771077            Indonesia         IDN           Jakarta   
2      1356872604                India         IND             Delhi   
3      1156237133                China         CHN         Guangzhou   
4      1356226629                India         IND            Mumbai   
...           ...                  ...         ...               ...   
47863  1408979215         Korea, North         PRK        Munha-dong   
47864  1408767958         Korea, North         PRK            Sil-li   
47865  1410001061         Korea, South         KOR              Muan   
47866  1410822139         Korea, South         KOR         Hongseong   
47867  1850037473  U.S. Virgin Islands         VIR  Charlotte Amalie   

       POPULATION            CITY STATE    LAT_N    LONG_W  
0      37732000.0           Tōkyō    JP  35.6897  139.6922  
1      337560

## Function execute queries

In [67]:
def execute_query(
        use_databes: str = "CITY_DATABASE.db",
        my_query: str = "SELECT * FROM city;"
) -> pd.DataFrame:
    try:
        # Connect to SQLite database
        conn = sqlite3.connect(use_databes)

        # Create a cursor object
        cursor = conn.cursor()

        # SQL command to select all records from the users table
        select_query = my_query

        # Execute the SQL command
        cursor.execute(select_query)

        # Fetch all results
        results = cursor.fetchall()

        # Print the results
        # for row in results:
        #     print(row)

        # Optionally convert the results to a pandas DataFrame
        df = pd.DataFrame(results, columns=[column[0] for column in cursor.description])
        print(df)

        # Close the connection
        conn.close()

    except Exception as e:
        print(f"Error: {e}")

In [69]:
# execute_query()

# Questions on SQL

**Q1.** 

Query all columns for all American cities in the CITY table with populations larger than 100000.
The CountryCode for America is USA.
The CITY table is described as follows:

**CITY**

| **Field** | **Type** |
---|---
ID | NUMBER
NAME | VARCHAR2(17)
COUNTRYCODE | VARCHAR2(3)
DISTRICT | VARCHAR2(20)
POPULATION | NUMBER

In [70]:
execute_query("""
    
""")

Error: no such table: city


**Q2.** 

Query the NAME field for all American cities in the CITY table with populations larger than 120000.
The CountryCode for America is USA.
The CITY table is described as follows:

**CITY**

| **Field** | **Type** |
---|---
ID | NUMBER
NAME | VARCHAR2(17)
COUNTRYCODE | VARCHAR2(3)
DISTRICT | VARCHAR2(20)
POPULATION | NUMBER

**Q3.**  
Query all columns (attributes) for every row in the CITY table.  
The CITY table is described as follows:

**CITY**

| **Field** | **Type** |
---|---
ID | NUMBER
NAME | VARCHAR2(17)
COUNTRYCODE | VARCHAR2(3)
DISTRICT | VARCHAR2(20)
POPULATION | NUMBER

**Q4.**  
Query all columns for a city in CITY with the ID 1661.  
The CITY table is described as follows:

**CITY**

| **Field** | **Type** |
---|---
ID | NUMBER
NAME | VARCHAR2(17)
COUNTRYCODE | VARCHAR2(3)
DISTRICT | VARCHAR2(20)
POPULATION | NUMBER

**Q5.**  
Query all attributes of every Japanese city in the CITY table. The COUNTRYCODE for Japan is JPN.  
The CITY table is described as follows:

**CITY**

| **Field** | **Type** |
---|---
ID | NUMBER
NAME | VARCHAR2(17)
COUNTRYCODE | VARCHAR2(3)
DISTRICT | VARCHAR2(20)
POPULATION | NUMBER

**Q6.**  
Query the names of all the Japanese cities in the CITY table. The COUNTRYCODE for Japan is JPN.  
The CITY table is described as follows:

**CITY**

| **Field** | **Type** |
---|---
ID | NUMBER
NAME | VARCHAR2(17)
COUNTRYCODE | VARCHAR2(3)
DISTRICT | VARCHAR2(20)
POPULATION | NUMBER

**Q7.**  
Query a list of CITY and STATE from the STATION table.  
The STATION table is described as follows:  
where LAT_N is the northern latitude and LONG_W is the western longitude.

**STATION**

| **Field** | **Type** |
---|---
ID | NUMBER
CITY | VARCHAR2(21)
STATE | VARCHAR2(2)
LAT_N | NUMBER
LONG_W | NUMBER

**Q8.**  
Query a list of CITY names from STATION for cities that have an even ID number. Print the results in any order, but exclude duplicates from the answer.  
The STATION table is described as follows:  
where LAT_N is the northern latitude and LONG_W is the western longitude.

**STATION**

| **Field** | **Type** |
---|---
ID | NUMBER
CITY | VARCHAR2(21)
STATE | VARCHAR2(2)
LAT_N | NUMBER
LONG_W | NUMBER

**Q9.**  
Find the difference between the total number of CITY entries in the table and the number of distinct CITY entries in the table.  
The STATION table is described as follows:  

**STATION**

| **Field** | **Type** |
---|---
ID | NUMBER
CITY | VARCHAR2(21)
STATE | VARCHAR2(2)
LAT_N | NUMBER
LONG_W | NUMBER


where LAT_N is the northern latitude and LONG_W is the western longitude.  
*For example, if there are three records in the table with CITY values 'New York', 'New York', 'Bengalaru', there are 2 different city names: 'New York' and 'Bengalaru'. The query returns 1, because total number of records - number of unique city names = 3 - 2 = 1.*


**Q10.**  
Query the two cities in STATION with the shortest and longest CITY names, as well as their respective lengths (i.e., number of characters in the name). If there is more than one smallest or largest city, choose the one that comes first when ordered alphabetically.  
The STATION table is described as follows: 

**STATION**

| **Field** | **Type** |
---|---
ID | NUMBER
CITY | VARCHAR2(21)
STATE | VARCHAR2(2)
LAT_N | NUMBER
LONG_W | NUMBER

where LAT_N is the northern latitude and LONG_W is the western longitude.  
*Sample Input:*  
*For example, CITY has four entries: DEF, ABC, PQRS, and WXY.*  

*Sample Output:*  

| CITY | Length |
|------|--------|
| ABC  | 3      |
| PQRS | 4      |

**Hint:**  
*When ordered alphabetically, the CITY names are listed as ABC, DEF, PQRS, and WXY, with lengths and. The longest name is PQRS, but there are options for the shortest named city. Choose ABC, because it comes first alphabetically.*  

**Note:**  
*You can write two separate queries to get the desired output. It need not be a single query.*


**Q11.**  
Query the list of CITY names starting with vowels (i.e., a, e, i, o, or u) from STATION. Your result cannot contain duplicates.  
*Input Format:*  
The STATION table is described as follows:  

**STATION**

| **Field** | **Type** |
---|---
ID | NUMBER
CITY | VARCHAR2(21)
STATE | VARCHAR2(2)
LAT_N | NUMBER
LONG_W | NUMBER

where LAT_N is the northern latitude and LONG_W is the western longitude.

**Q12.**  
Query the list of CITY names ending with vowels (a, e, i, o, u) from STATION. Your result cannot contain duplicates.  
*Input Format:*  
The STATION table is described as follows:  

**STATION**

| **Field** | **Type** |
---|---
ID | NUMBER
CITY | VARCHAR2(21)
STATE | VARCHAR2(2)
LAT_N | NUMBER
LONG_W | NUMBER

where LAT_N is the northern latitude and LONG_W is the western longitude.

**Q13.**  
Query the list of CITY names from STATION that do not start with vowels. Your result cannot contain duplicates.  
*Input Format:*  
The STATION table is described as follows:  

**STATION**

| **Field** | **Type** |
---|---
ID | NUMBER
CITY | VARCHAR2(21)
STATE | VARCHAR2(2)
LAT_N | NUMBER
LONG_W | NUMBER

where LAT_N is the northern latitude and LONG_W is the western longitude.

**Q14.**  
Query the list of CITY names from STATION that do not end with vowels. Your result cannot contain duplicates.  
*Input Format:*  
The STATION table is described as follows:  

**STATION**

| **Field** | **Type** |
---|---
ID | NUMBER
CITY | VARCHAR2(21)
STATE | VARCHAR2(2)
LAT_N | NUMBER
LONG_W | NUMBER

where LAT_N is the northern latitude and LONG_W is the western longitude.

**Q15.**  
Query the list of CITY names from STATION that either do not start with vowels or do not end with vowels. Your result cannot contain duplicates.  
*Input Format:*  
The STATION table is described as follows:  

**STATION**

| **Field** | **Type** |
---|---
ID | NUMBER
CITY | VARCHAR2(21)
STATE | VARCHAR2(2)
LAT_N | NUMBER
LONG_W | NUMBER

where LAT_N is the northern latitude and LONG_W is the western longitude.


**Q16.**  
Query the list of CITY names from STATION that do not start with vowels and do not end with vowels. Your result cannot contain duplicates.  
*Input Format:*  
The STATION table is described as follows:  

**STATION**

| **Field** | **Type** |
---|---
ID | NUMBER
CITY | VARCHAR2(21)
STATE | VARCHAR2(2)
LAT_N | NUMBER
LONG_W | NUMBER

where LAT_N is the northern latitude and LONG_W is the western longitude.
