## Question 1: **Answer**

In [1]:
import pandas as pd
import sqlite3

In [None]:
def add_data_to_database(input_data):
    try:
        df = pd.read_csv(input_data)
        engine = sqlite3.connect("housing_data.db")
        df.to_sql('housing',con=engine, index = False)
        engine.close()
    except Exception as e:
            print(f"Error: {e}")

# Loading data
data = "/content/housing_dataset.csv"
add_data_to_database(data)

In [3]:
# Loading SQL extension
%load_ext sql

In [4]:
# Connecting to the database
%sql sqlite:///housing_data.db

In [5]:
# Default to duplicated style
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

In [6]:
%%sql
SELECT * FROM housing LIMIT 5;

 * sqlite:///housing_data.db
Done.


State,City,Median_Home_Price_2006,Median_Home_Price_2008,Foreclosure_Rate_2007,Foreclosure_Rate_2009,Unemployment_Rate_2007,Unemployment_Rate_2009,Median_Household_Income_2006,Median_Household_Income_2008
CA,Los Angeles,"$520,000","$430,000",1.50%,7.20%,4.50%,10.30%,"$78,000","$65,000"
FL,Miami,"$380,000","$290,000",2.80%,9.50%,4.20%,11.70%,"$55,000","$48,000"
NV,Las Vegas,"$410,000","$310,000",3.10%,12.40%,5.70%,14.20%,"$62,000","$51,000"
AZ,Phoenix,"$250,000","$200,000",1.20%,6.80%,3.90%,9.60%,"$58,000","$49,000"
CA,San Diego,"$480,000","$390,000",1.80%,8.10%,4.70%,11.00%,"$72,000","$60,000"


## Question 2: **Answer**

In [7]:
%%sql
CREATE TABLE housing_copy AS
SELECT * FROM housing;

 * sqlite:///housing_data.db
Done.


[]

#### 2.  Clean the columns( Remove the '$' and '%' from the columns.)

In [8]:
%%sql
UPDATE housing_copy
SET
    Median_Home_Price_2006 = REPLACE(REPLACE(Median_Home_Price_2006, '$', ''), ',', ''),
    Median_Home_Price_2008 = REPLACE(REPLACE(Median_Home_Price_2008, '$', ''), ',', ''),
    Median_Household_Income_2006 = REPLACE(REPLACE(Median_Household_Income_2006, '$', ''), ',', ''),
    Median_Household_Income_2008 = REPLACE(REPLACE(Median_Household_Income_2008, '$', ''), ',', ''),
    Foreclosure_Rate_2007 = REPLACE(Foreclosure_Rate_2007, '%', ''),
    Foreclosure_Rate_2009 = REPLACE(Foreclosure_Rate_2009, '%', ''),
    Unemployment_Rate_2007 = REPLACE(Unemployment_Rate_2007, '%', ''),
    Unemployment_Rate_2009 = REPLACE(Unemployment_Rate_2009, '%', '');

 * sqlite:///housing_data.db
11 rows affected.


[]

In [9]:
%%sql
SELECT * FROM housing_copy;

 * sqlite:///housing_data.db
Done.


State,City,Median_Home_Price_2006,Median_Home_Price_2008,Foreclosure_Rate_2007,Foreclosure_Rate_2009,Unemployment_Rate_2007,Unemployment_Rate_2009,Median_Household_Income_2006,Median_Household_Income_2008
CA,Los Angeles,520000,430000,1.5,7.2,4.5,10.3,78000,65000
FL,Miami,380000,290000,2.8,9.5,4.2,11.7,55000,48000
NV,Las Vegas,410000,310000,3.1,12.4,5.7,14.2,62000,51000
AZ,Phoenix,250000,200000,1.2,6.8,3.9,9.6,58000,49000
CA,San Diego,480000,390000,1.8,8.1,4.7,11.0,72000,60000
FL,Orlando,220000,180000,2.1,8.7,4.1,10.9,52000,44000
CA,San Jose,750000,620000,1.0,5.4,4.3,8.2,95000,80000
NV,Reno,340000,270000,2.4,9.8,5.4,12.9,60000,50000
CA,Sacramento,320000,270000,1.4,7.1,4.8,10.5,65000,56000
FL,Tampa,210000,170000,2.6,9.2,4.0,11.4,50000,42000


## Question 3: **Answer**

In [10]:
%%sql
-- calculating price decline per city
WITH percent_decline AS (SELECT
    city,
    (Median_Home_Price_2006 - Median_Home_Price_2008) AS price_decline,
    (((Median_Home_Price_2006 - Median_Home_Price_2008 )/
            CAST(Median_Home_Price_2006 AS FLOAT)) * 100)
    AS decline_percentage
FROM housing_copy
GROUP BY city)
-- Returning city with second highest price decline
SELECT
    city,
    price_decline,
    ROUND(Max(decline_percentage), 2) AS max_decline
FROM percent_decline
WHERE decline_percentage < (SELECT MAX(decline_percentage)
                            FROM percent_decline);

 * sqlite:///housing_data.db
Done.


city,price_decline,max_decline
Miami,90000,23.68


## Question 4: **Answer**

In [11]:
%%sql
SELECT city,
       (Median_Home_Price_2006 - Median_Home_Price_2008)
        AS price_decline,
       (Foreclosure_Rate_2009 - Foreclosure_Rate_2007)
        AS foreclosure_increase
FROM housing_copy
WHERE city IN ('Los Angeles', 'Phoenix')
OrDER by foreclosure_increase DESC;

 * sqlite:///housing_data.db
Done.


City,price_decline,foreclosure_increase
Los Angeles,90000,5.7
Phoenix,50000,5.6


## Question 5: **Answer**

In [12]:
%%sql
-- Calculating foreclosure changes
WITH foreclosure_changes AS (
    SELECT
        State,
        (Foreclosure_Rate_2009 - Foreclosure_Rate_2007)
        AS absolute_increase,
        -- Calculating percentage increase in foreclosures
        ROUND(((Foreclosure_Rate_2009 - Foreclosure_Rate_2007) /
                Foreclosure_Rate_2007) * 100, 2)
        AS percentage_increase
    FROM housing_copy
)
-- Returning max and min absolute and percentage increase
SELECT
    State,
    absolute_increase,
    percentage_increase
FROM foreclosure_changes
WHERE percentage_increase = (SELECT MAX(percentage_increase)
                             FROM foreclosure_changes)
    OR percentage_increase = (SELECT MIN(percentage_increase)
                              FROM foreclosure_changes);

 * sqlite:///housing_data.db
Done.


State,absolute_increase,percentage_increase
FL,6.7,239.29
AZ,5.6,466.67


## Question 6: **Answer**

In [13]:
%%sql
-- Ranking states  by changes in foreclosure rates
WITH state_foreclosure_changes AS (
    SELECT
        State,
        ROUND(AVG(Foreclosure_Rate_2009 - Foreclosure_Rate_2007), 2)
        AS foreclosure_increase_avg,
        DENSE_RANK() OVER (ORDER BY AVG(Foreclosure_Rate_2009 - Foreclosure_Rate_2007) DESC)
        AS rank
    FROM housing_copy
    GROUP BY State
)
-- Returning the top 3 ranked states
SELECT
    State,
    foreclosure_increase_avg,
    rank
FROM state_foreclosure_changes
WHERE rank <= 3;

 * sqlite:///housing_data.db
Done.


State,foreclosure_increase_avg,rank
NV,8.35,1
TX,7.3,2
FL,6.63,3


In [14]:
# %%sql
# DROP TABLE housing_copy;