#### UNDERSTANDING THE DATA
##### Create a schema 'census' and import table d1, d2 to the schema

In [None]:
SELECT * FROM census.d1;
SELECT * FROM census.d2;
DESCRIBE census.d1;

| District   | text | YES |
| ---------- | ---- | --- |
| State      | text | YES |
| Area_km2   | text | YES |
| Population | text | YES |

In [None]:
DESCRIBE census.d2;

| District  | text   | YES |
| --------- | ------ | --- |
| State     | text   | YES |
| Growth    | text   | YES |
| Sex_Ratio | int    | YES |
| Literacy  | double | YES |

In [None]:
SELECT COUNT(*) FROM census.d1; 
#636
SELECT COUNT(district) FROM census.d1;
#636
SELECT COUNT(DISTINCT district) FROM census.d1;
#630
SELECT COUNT(*) FROM census.d2;
#640
SELECT COUNT(district) FROM census.d2;
#640
SELECT COUNT(DISTINCT district) FROM census.d2;
#634

#### INVESTIGATING DUPLICATES IN census.d1

In [None]:
DROP TABLE IF EXISTS census.d1_new;
CREATE TABLE census.d1_new
	(SELECT *, 
			ROW_NUMBER() 
            OVER(PARTITION BY district
            ORDER BY district) AS dupp
FROM census.d1);

SELECT * FROM census.d1
WHERE district IN (SELECT district FROM census.d1_new
					WHERE dupp>1)
ORDER BY district;

DROP TABLE census.d1_new;

| District   | State            | Area_km2 | Population |
| ---------- | ---------------- | -------- | ---------- |
| Aurangabad | Maharashtra      | 10,107   | 37,01,282  |
| Aurangabad | Bihar            | 3,305    | 25,40,073  |
| Bijapur    | Karnataka        | 10,498   | 21,77,331  |
| Bijapur    | Chhattisgarh     | 8,530    | 2,55,230   |
| Bilaspur   | Himachal Pradesh | 1,167    | 3,81,956   |
| Bilaspur   | Chhattisgarh     | 8,272    | 26,63,629  |
| Hamirpur   | Uttar Pradesh    | 4,021    | 11,04,285  |
| Hamirpur   | Himachal Pradesh | 1,118    | 4,54,768   |
| Pratapgarh | Uttar Pradesh    | 3,717    | 32,09,141  |
| Pratapgarh | Rajasthan        | 4,449    | 8,67,848   |
| Raigarh    | Maharashtra      | 7,086    | 14,93,984  |
| Raigarh    | Chhattisgarh     | 7,152    | 26,34,200  |

#### INVESTIGATING DUPLICATES IN census.d1

In [None]:
DROP TABLE IF EXISTS census.d2_new;
CREATE TABLE census.d2_new
	(SELECT *, 
			ROW_NUMBER() 
            OVER(PARTITION BY district
            ORDER BY district) AS dupp
FROM census.d2);

SELECT * FROM census.d2
WHERE district IN (SELECT district FROM census.d2_new
					WHERE dupp>1)
ORDER BY district;

DROP TABLE census.d2_new;

| District   | State            | Growth | Sex_Ratio | Literacy |
| ---------- | ---------------- | ------ | --------- | -------- |
| Aurangabad | Maharashtra      | 0      | 923       | 79.02    |
| Aurangabad | Bihar            | 0      | 926       | 70.32    |
| Bijapur    | Karnataka        | 0      | 960       | 67.15    |
| Bijapur    | Chhattisgarh     | 0      | 984       | 40.86    |
| Bilaspur   | Chhattisgarh     | 0      | 971       | 70.78    |
| Bilaspur   | Himachal Pradesh | 0      | 981       | 84.59    |
| Hamirpur   | Uttar Pradesh    | 0      | 861       | 68.77    |
| Hamirpur   | Himachal Pradesh | 0      | 1,095     | 88.15    |
| Pratapgarh | Uttar Pradesh    | 0      | 998       | 70.09    |
| Pratapgarh | Rajasthan        | 0      | 983       | 55.97    |
| Raigarh    | Maharashtra      | 0      | 959       | 83.14    |
| Raigarh    | Chhattisgarh     | 0      | 991       | 73.26    |

#### d1 + d2 = data WITH TYPE CASTING

In [None]:
DROP TABLE IF EXISTS census.data;
CREATE TABLE census.data
(
SELECT 
	d2.district, 
	d2.state, 
    CAST(REPLACE(d1.Population,',','') AS UNSIGNED) AS Population, 
    CAST(REPLACE(d1.Area_km2,',','') AS UNSIGNED) AS Area, 
    CAST(REPLACE(d2.Growth,',','') AS DECIMAL(9,2)) AS  Growth, 
    d2.Literacy, 
    d2.Sex_Ratio
FROM census.d1 RIGHT JOIN census.d2
ON d1.district = d2.district AND d1.state = d2.state
ORDER BY state
);

SELECT * FROM census.data;
DESCRIBE census.data;

DELETE FROM census.data WHERE population IS NULL;

| Field      | Type            | Null |
| ---------- | --------------- | ---- |
| district   | text            | YES  |
| state      | text            | YES  |
| Population | bigint unsigned | YES  |
| Area       | bigint unsigned | YES  |
| Growth     | decimal(9,2)    | YES  |
| Literacy   | double          | YES  |
| Sex_Ratio  | int             | YES  |


#### Update table with literacy_sex_growth data

In [None]:
DROP PROCEDURE IF EXISTS drop_literacy_sex_growth_data;
DELIMITER //

CREATE PROCEDURE drop_literacy_sex_growth_data() BEGIN
IF EXISTS (SELECT * FROM information_schema.COLUMNS WHERE table_schema = 'census' AND COLUMN_NAME = 'Literate') 
THEN ALTER TABLE census.data DROP COLUMN Literate;
END IF;
 
IF EXISTS (SELECT * FROM information_schema.COLUMNS WHERE table_schema = 'census' AND COLUMN_NAME = 'Illiterate') 
THEN ALTER TABLE census.data DROP COLUMN Illiterate;
END IF;
 
IF EXISTS (SELECT * FROM information_schema.COLUMNS WHERE table_schema = 'census' AND COLUMN_NAME = 'Male') 
THEN ALTER TABLE census.data DROP COLUMN Male;
END IF;
 
IF EXISTS (SELECT * FROM information_schema.COLUMNS WHERE table_schema = 'census' AND COLUMN_NAME = 'Female') 
THEN ALTER TABLE census.data DROP COLUMN Female;
END IF;

IF EXISTS (SELECT * FROM information_schema.COLUMNS WHERE table_schema = 'census' AND COLUMN_NAME = 'Prev_Population') 
THEN ALTER TABLE census.data DROP COLUMN Prev_Population;
END IF;

ALTER TABLE census.data ADD COLUMN Literate INTEGER;
ALTER TABLE census.data ADD COLUMN Illiterate INTEGER;
ALTER TABLE census.data ADD COLUMN Male INTEGER;
ALTER TABLE census.data ADD COLUMN Female INTEGER;
ALTER TABLE census.data ADD COLUMN Prev_Population INTEGER;

END//
DELIMITER ;

CALL drop_literacy_sex_growth_data;

UPDATE census.data SET literate = literacy/100 * population;
UPDATE census.data SET illiterate = population - literate;
UPDATE census.data SET male = population/(1+sex_ratio/1000);
UPDATE census.data SET female = population - male;
UPDATE census.data SET Prev_Population = population/(1+growth/100);

DESCRIBE census.data;

| district        | text            | YES |
| --------------- | --------------- | --- |
| state           | text            | YES |
| Population      | bigint unsigned | YES |
| Area            | bigint unsigned | YES |
| Growth          | decimal(9,2)    | YES |
| Literacy        | double          | YES |
| Sex_Ratio       | int             | YES |
| Literate        | int             | YES |
| Illiterate      | int             | YES |
| Male            | int             | YES |
| Female          | int             | YES |
| Prev_Population | int             | YES |

#### Data Summary

In [None]:
SELECT ROUND(MIN(Literacy),2), ROUND(MAX(Literacy),2), ROUND(AVG(Literacy),2) FROM census.data;
SELECT ROUND(MIN(Literate)), ROUND(MAX(Literate)), ROUND(AVG(Literate)) FROM census.data;
SELECT ROUND(MIN(Illiterate)), ROUND(MAX(Illiterate)), ROUND(AVG(Illiterate)) FROM census.data;

SELECT ROUND(MIN(Sex_Ratio)), ROUND(MAX(Sex_Ratio)), ROUND(AVG(Sex_Ratio)) FROM census.data;
SELECT ROUND(MIN(Male)), ROUND(MAX(Male)), ROUND(AVG(Male)) FROM census.data;
SELECT ROUND(MIN(Female)), ROUND(MAX(Female)), ROUND(AVG(Female)) FROM census.data;

SELECT ROUND(MIN(Growth),2), ROUND(MAX(Growth),2), ROUND(AVG(Growth),2) FROM census.data;
SELECT ROUND(MIN(Prev_Population)), ROUND(MAX(Prev_Population)), ROUND(AVG(Prev_Population)) FROM census.data;
SELECT ROUND(MIN(Population),2), ROUND(MAX(Population),2), ROUND(AVG(Population),2) FROM census.data;
SELECT ROUND(MIN(Area),2), ROUND(MAX(Area),2), ROUND(AVG(Area),2) FROM census.data;

| Field           | MIN     | MAX      | AVG     |
| --------------- | ------- | -------- | ------- |
| Literacy        | 36.1    | 97.91    | 72.3    |
| Literate        | 5131    | 9349143  | 1376107 |
| Illiterate      | 891     | 2373382  | 517338  |
|                 |         |          |         |
| Sex Ratio       | 534     | 1184     | 946     |
| Male            | 4415    | 5864341  | 974534  |
| Female          | 3589    | 5195807  | 918911  |
|                 |         |          |         |
| Growth          | \-21.57 | 131.81   | 19.24   |
| Prev_Population | 7272    | 8934114  | 1608172 |
| Population      | 8004    | 11060148 | 1893445 |
| Area            | 9       | 45674    | 4955.9  |

#### INDIA's DEMOGRAPHICS

In [None]:
SELECT
       ROUND(AVG(Literacy),2) AS Avg_Literacy_rate_India, 
       ROUND(SUM(Literate)/SUM(population)*100,2) AS Act_literacy_rate_India,
       ROUND(SUM(population),2) AS Tot_Pop_India,
       ROUND(AVG(literate)) AS Avg_Literate_India, 
       ROUND(AVG(illiterate)) AS Avg_Illiterate_India,
       
       ROUND(AVG(Sex_Ratio)) AS Avg_Sex_Ratio_India,
       ROUND(SUM(Female)/SUM(Male)*1000,2) AS Act_Sex_Ratio_India,
       ROUND(AVG(male)) AS Avg_Male_India,
	   ROUND(AVG(female)) AS Avg_Female_India, 
       
       ROUND(SUM(prev_population)) AS Tot_Prev_Pop_India,
       ROUND(SUM(Area),2) AS Tot_Area_India, 
       ROUND(SUM(population)/SUM(area),2) AS density_India,
       ROUND(SUM(prev_population)/SUM(area),2) AS Prev_density_India,
       ROUND(AVG(growth),2) AS Avg_growth_rate_India,
	   ROUND(((SUM(population)-SUM(prev_population))/SUM(prev_population))*100,2) AS Act_growth_rate_India
FROM census.data;

| Avg_Literacy_rate_India | Act_literacy_rate_India | Tot_Pop_India | Avg_Literate_India | Avg_Illiterate_India | Avg_Sex_Ratio_India | Act_Sex_Ratio_India | Avg_Male_India | Avg_Female_India | Tot_Prev_Pop_India | Tot_Area_India | density_India | Prev_density_India | Avg_growth_rate_India | Act_growth_rate_India |
| ----------------------- | ----------------------- | ------------- | ------------------ | -------------------- | ------------------- | ------------------- | -------------- | ---------------- | ------------------ | -------------- | ------------- | ------------------ | --------------------- | --------------------- |
| 72.3                    | 72.68                   | 1.2E+09       | 1376107            | 517338               | 946                 | 942.92              | 974534         | 918911           | 1.02E+09           | 3146998        | 382.06        | 324.5              | 19.24                 | 17.74                 |

#### DEMOGRAPHICS BY STATE

In [None]:
SELECT state, 
	   ROUND(AVG(Literacy),2) AS Avg_Literacy_rate, 
       ROUND(SUM(Literate)/SUM(population)*100,2) AS Act_literacy_rate,
       ROUND(SUM(population),2) AS Tot_Pop,
       ROUND(AVG(literate)) AS Avg_Literate, 
       ROUND(AVG(illiterate)) AS Avg_Illiterate,
       
       ROUND(AVG(Sex_Ratio)) AS Avg_Sex_Ratio,
       ROUND(SUM(Female)/SUM(Male)*1000,2) AS Act_Sex_Ratio,
       ROUND(AVG(male)) AS Avg_Male,
	   ROUND(AVG(female)) AS Avg_Female, 
       
       ROUND(SUM(prev_population)) AS Tot_Prev_Pop,
       ROUND(SUM(Area),2) AS Tot_Area, 
       ROUND(SUM(population)/SUM(area),2) AS density,
       ROUND(SUM(prev_population)/SUM(area),2) AS Prev_density,
       ROUND(AVG(growth),2) AS Avg_growth_rate,
	   ROUND(((SUM(population)-SUM(prev_population))/SUM(prev_population))*100,2) AS Act_growth_rate
FROM census.data
GROUP BY state
ORDER BY state;

| State                       | Avg_Literacy_rate_India | Act_literacy_rate_India | Tot_Pop_India | Avg_Literate_India | Avg_Illiterate_India | Avg_Sex_Ratio_India | Act_Sex_Ratio_India | Avg_Male_India | Avg_Female_India | Tot_Prev_Pop_India | Tot_Area_India | density_India | Prev_density_India | Avg_growth_rate_India | Act_growth_rate_India |
| --------------------------- | ----------------------- | ----------------------- | ------------- | ------------------ | -------------------- | ------------------- | ------------------- | -------------- | ---------------- | ------------------ | -------------- | ------------- | ------------------ | --------------------- | --------------------- |
| Andaman And Nicobar Islands | 83.7                    | 86.61                   | 380581        | 109874             | 16987                | 858                 | 875.98              | 67623          | 59237            | 356161             | 8249           | 46.14         | 43.18              | 0.6                   | 6.86                  |
| Andhra Pradesh              | 66.29                   | 67                      | 84580777      | 2463986            | 1213439              | 995                 | 992.97              | 1845201        | 1832224          | 76209715           | 275045         | 307.52        | 277.08             | 10.92                 | 10.98                 |
| Arunachal Pradesh           | 63.86                   | 65.24                   | 1383727       | 56420              | 30063                | 920                 | 938.18              | 44621          | 41862            | 1097968            | 83743          | 16.52         | 13.11              | 27.81                 | 26.03                 |
| Assam                       | 72.25                   | 72.05                   | 31205576      | 832756             | 323006               | 958                 | 957.71              | 590365         | 565397           | 26655449           | 78438          | 397.84        | 339.83             | 16.52                 | 17.07                 |
| Bihar                       | 61.76                   | 61.71                   | 1.04E+08      | 1690433            | 1049026              | 918                 | 917.9               | 1428361        | 1311099          | 82998803           | 94163          | 1105.52       | 881.44             | 25.23                 | 25.42                 |
| Chandigarh                  | 86.05                   | 86.05                   | 1055450       | 908215             | 147235               | 818                 | 818                 | 580556         | 474894           | 900631             | 114            | 9258.33       | 7900.27            | 17.19                 | 17.19                 |
| Chhattisgarh                | 65.84                   | 70.33                   | 26685414      | 1042602            | 439921               | 995                 | 990.62              | 744754         | 737769           | 21799849           | 135258         | 197.29        | 161.17             | 20.04                 | 22.41                 |
| Dadra and Nagar Haveli      | 76.24                   | 76.24                   | 343709        | 262044             | 81665                | 774                 | 774                 | 193748         | 149961           | 220496             | 491            | 700.02        | 449.08             | 55.88                 | 55.88                 |
| Daman and Diu               | 85.76                   | 87.08                   | 243247        | 105914             | 15710                | 783                 | 618.8               | 75132          | 46492            | 158207             | 111            | 2191.41       | 1425.29            | 42.74                 | 53.75                 |
| Delhi                       | 86.56                   | 85.76                   | 8172376       | 778695             | 129347               | 866                 | 862.07              | 487652         | 420390           | 6659209            | 8077           | 1011.81       | 824.47             | 13.89                 | 22.72                 |
| Goa                         | 88.58                   | 88.7                    | 1458545       | 646868             | 82405                | 975                 | 973.04              | 369620         | 359653           | 1347647            | 3702           | 393.99        | 364.03             | 8.29                  | 8.23                  |
| Gujarat                     | 76.39                   | 77.91                   | 60439692      | 1811057            | 513546               | 938                 | 919.19              | 1211242        | 1113361          | 50670953           | 196244         | 307.98        | 258.2              | 17.08                 | 19.28                 |
| Haryana                     | 75.36                   | 75.43                   | 25351462      | 910642             | 296570               | 880                 | 878.56              | 642627         | 564585           | 21144594           | 44212          | 573.41        | 478.25             | 20.49                 | 19.9                  |
| Himachal Pradesh            | 81.75                   | 82.77                   | 6864602       | 473465             | 98585                | 953                 | 971.45              | 290168         | 281883           | 6077952            | 55673          | 123.3         | 109.17             | 11.34                 | 12.94                 |
| Jammu and Kashmir           | 65.38                   | 66.91                   | 12541302      | 381416             | 188643               | 883                 | 888.58              | 301846         | 268214           | 10143703           | 101387         | 123.7         | 100.05             | 24.27                 | 23.64                 |
| Jharkhand                   | 64.74                   | 66.25                   | 32988134      | 910546             | 463959               | 956                 | 948.4               | 705453         | 669053           | 26945913           | 79716          | 413.82        | 338.02             | 23.78                 | 22.42                 |
| Karnataka                   | 73.66                   | 75.25                   | 61095297      | 1532498            | 504012               | 984                 | 973.04              | 1032169        | 1004341          | 52850847           | 191791         | 318.55        | 275.56             | 11.86                 | 15.6                  |
| Kerala                      | 93.69                   | 93.98                   | 33406061      | 2242540            | 143608               | 1080                | 1084.3              | 1144821        | 1241327          | 31841402           | 38852          | 859.83        | 819.56             | 4.13                  | 4.91                  |
| Lakshadweep                 | 91.85                   | 91.85                   | 64473         | 59218              | 5255                 | 946                 | 946                 | 33131          | 31342            | 60652              | 30             | 2149.1        | 2021.73            | 6.3                   | 6.3                   |
| Madhya Pradesh              | 67.81                   | 69.4                    | 69443702      | 1004016            | 442727               | 935                 | 929.87              | 749659         | 697085           | 57744814           | 292875         | 237.11        | 197.17             | 20.16                 | 20.26                 |
| Maharashtra                 | 80.97                   | 82.27                   | 1.11E+08      | 2614552            | 563566               | 948                 | 929.13              | 1647434        | 1530683          | 95923519           | 307647         | 361.56        | 311.8              | 13.95                 | 15.96                 |
| Manipur                     | 76.36                   | 76.92                   | 2855794       | 244060             | 73251                | 976                 | 985.04              | 159851         | 157459           | 2293890            | 22327          | 127.91        | 102.74             | 25.54                 | 24.5                  |
| Meghalaya                   | 73.23                   | 74.32                   | 2966889       | 314993             | 108848               | 980                 | 988.77              | 213118         | 210724           | 2318863            | 22429          | 132.28        | 103.39             | 30.39                 | 27.95                 |
| Mizoram                     | 89.36                   | 91.13                   | 1097206       | 124991             | 12160                | 966                 | 975.75              | 69417          | 67734            | 888561             | 21081          | 52.05         | 42.15              | 24.35                 | 23.48                 |
| Nagaland                    | 78.4                    | 79.34                   | 1978502       | 142705             | 37159                | 934                 | 930.67              | 93162          | 86702            | 1047139            | 16579          | 119.34        | 63.16              | 82.28                 | 88.94                 |
| Orissa                      | 70.84                   | 72.63                   | 41974218      | 1016227            | 382914               | 984                 | 978.77              | 707075         | 692066           | 36804447           | 155707         | 269.57        | 236.37             | 14.25                 | 14.05                 |
| Puducherry                  | 87.46                   | 85.85                   | 1247953       | 267838             | 44151                | 1075                | 1037.12             | 153152         | 158837           | 974363             | 490            | 2546.84       | 1988.5             | 34.3                  | 28.08                 |
| Punjab                      | 74.2                    | 75.52                   | 26748710      | 1063225            | 344602               | 898                 | 895.52              | 742714         | 665113           | 23612032           | 49268          | 542.92        | 479.26             | 12.66                 | 13.28                 |
| Rajasthan                   | 64.6                    | 66.04                   | 68548437      | 1371738            | 705488               | 930                 | 928.13              | 1077326        | 999899           | 56507161           | 342239         | 200.29        | 165.11             | 21.13                 | 21.31                 |
| Sikkim                      | 81.09                   | 80.9                    | 6682899       | 1802105            | 425528               | 852                 | 852.34              | 1202608        | 1025025          | 6028100            | 372            | 17964.78      | 16204.57           | 11.3                  | 10.86                 |
| Tamil Nadu                  | 79.34                   | 80.08                   | 72147030      | 1805485            | 449110               | 999                 | 996.35              | 1129357        | 1125238          | 62405691           | 130060         | 554.72        | 479.82             | 14.62                 | 15.61                 |
| Tripura                     | 86.64                   | 87.2                    | 3673917       | 800901             | 117579               | 958                 | 959.88              | 468641         | 449838           | 3199223            | 10486          | 350.36        | 305.09             | 16.75                 | 14.84                 |
| Uttar Pradesh               | 67.39                   | 67.51                   | 1.98E+08      | 1909716            | 919086               | 910                 | 912.87              | 1478827        | 1349975          | 1.65E+08           | 237907         | 832.33        | 692.01             | 20.11                 | 20.28                 |
| Uttarakhand                 | 79.64                   | 78.77                   | 10086292      | 611127             | 164742               | 1010                | 963.16              | 395214         | 380654           | 8489288            | 53483          | 188.59        | 158.73             | 13.06                 | 18.81                 |
| West Bengal                 | 74.86                   | 76.12                   | 91276115      | 3656599            | 1147407              | 950                 | 949.91              | 2463705        | 2340301          | 80177140           | 88752          | 1028.44       | 903.38             | 14.13                 | 13.84                 |

#### TOP 3 state with highest & lowest Literate population

In [None]:
(SELECT * FROM census.data
ORDER BY Literacy DESC
LIMIT 3)
UNION
(SELECT * FROM census.data
ORDER BY Literacy
LIMIT 3);

| district | state          | Population | Area | Growth | Literacy | Sex_Ratio | Literate | Illiterate | Male   | Female | Prev_Population |
| ----------- | -------------- | ---------- | ---- | ------ | -------- | --------- | -------- | ---------- | ------ | ------ | --------------- |
| Serchhip    | Mizoram        | 64937      | 1421 | 20.56  | 97.91    | 977       | 63580    | 1357       | 32846  | 32091  | 53863           |
| Aizawl      | Mizoram        | 400309     | 3576 | 22.92  | 97.89    | 1009      | 391862   | 8447       | 199258 | 201051 | 325666          |
| Mahe        | Puducherry     | 41816      | 9    | 13.54  | 97.87    | 1184      | 40925    | 891        | 19147  | 22669  | 36829           |
| Alirajpur   | Madhya Pradesh | 728999     | 3182 | 19.45  | 36.1     | 1011      | 263169   | 465830     | 362506 | 366493 | 610296          |
| Bijapur     | Chhattisgarh   | 255230     | 8530 | 8.78   | 40.86    | 984       | 104287   | 150943     | 128644 | 126586 | 234630          |
| Dantewada   | Chhattisgarh   | 533638     | 8298 | 12.08  | 42.12    | 1020      | 224768   | 308870     | 264177 | 269461 | 476122          |

##### Similarly TOP 3  & BOTTOM 3 districts can be found as per growth & sex ratio