In [1]:
import os
import requests
import csv
from time import sleep

In [None]:
import requests
import time
import json

base_url = "https://dackkms.gov.in/Account/API/kKMS_QueryData.aspx"

def test_api(state, district, month, year):
    state_code = f"{state:02d}"
    district_code = f"{state_code}{district:02d}"
    params = {
        "StateCD": state_code,
        "DistrictCd": district_code,
        "Month": month,
        "Year": year
    }
    try:
        response = requests.get(base_url, params=params)
        return response.status_code, response.json()
    except requests.RequestException as e:
        return 0, str(e)

def extract_info(content):
    if content['ResponseCode'] == '1' and content['Response'] == 'Data Found':
        if content['data']:
            return content['data'][0]['StateName'], content['data'][0]['DistrictName']
    return None, None

def save_intermediate_results(states, districts, last_processed_state):
    with open(f'states_intermediate_{last_processed_state}.json', 'w') as f:
        json.dump(states, f, indent=2)
    with open(f'districts_intermediate_{last_processed_state}.json', 'w') as f:
        json.dump(districts, f, indent=2)
    print(f"\nSaved intermediate results up to state {last_processed_state}")

def main():
    states = {}
    districts = {}
    total_requests = 0
    successful_extractions = 0

    print("Starting the discovery process...")

    for state in range(1, 100):  # Checking state codes from 01 to 99
        state_code = f"{state:02d}"
        districts[state_code] = {}
        print(f"\nChecking state code: {state_code}")

        for district in range(1, 100):  # Assuming max 99 districts per state
            total_requests += 1
            status, content = test_api(state, district, 1, 2023)  # Using January 2023
            
            if status == 200 and isinstance(content, dict):
                state_name, district_name = extract_info(content)
                
                if state_name and district_name:
                    successful_extractions += 1
                    if state_code not in states:
                        states[state_code] = state_name
                        print(f"Discovered new state: {state_name} ({state_code})")

                    district_code = f"{state_code}{district:02d}"
                    districts[state_code][district_code] = district_name

                    print(f"Found: State {state_name} ({state_code}), District {district_name} ({district_code})")
                else:
                    print(f"No data for state code {state_code}, district number {district:02d}")
            else:
                print(f"Failed or empty response for state code {state_code}, district number {district:02d}")

            time.sleep(0.5)  # Add a small delay to avoid overwhelming the server

            # Print progress every 10 requests
            if total_requests % 10 == 0:
                print(f"Progress: {total_requests} requests made, {successful_extractions} successful extractions")

        # Save intermediate results every 3 states
        if state % 3 == 0:
            save_intermediate_results(states, districts, state)

        # Add a 15-second break after processing each state
        print(f"Finished processing state code {state_code}. Taking a 15-second break...")
        time.sleep(15)

    # Save the final results
    with open('states_final.json', 'w') as f:
        json.dump(states, f, indent=2)
    print("\nSaved final states data to states_final.json")

    with open('districts_final.json', 'w') as f:
        json.dump(districts, f, indent=2)
    print("Saved final districts data to districts_final.json")

    print(f"\nDiscovery process completed.")
    print(f"Total requests made: {total_requests}")
    print(f"Successful extractions: {successful_extractions}")
    print(f"States discovered: {len(states)}")
    print(f"Districts discovered: {sum(len(districts[state]) for state in districts)}")

if __name__ == "__main__":
    main()

In [None]:
import requests
import time
import json
import os

base_url = "https://dackkms.gov.in/Account/API/kKMS_QueryData.aspx"

def test_api(state, district, month, year):
    state_code = f"{state:02d}"
    district_code = f"{state_code}{district:02d}"
    params = {
        "StateCD": state_code,
        "DistrictCd": district_code,
        "Month": month,
        "Year": year
    }
    try:
        response = requests.get(base_url, params=params)
        return response.status_code, response.json()
    except requests.RequestException as e:
        return 0, str(e)

def extract_info(content):
    if content['ResponseCode'] == '1' and content['Response'] == 'Data Found':
        if content['data']:
            return content['data'][0]['StateName'], content['data'][0]['DistrictName']
    return None, None

def save_intermediate_results(states, districts, last_processed_state):
    with open(f'states_intermediate_{last_processed_state}.json', 'w') as f:
        json.dump(states, f, indent=2)
    with open(f'districts_intermediate_{last_processed_state}.json', 'w') as f:
        json.dump(districts, f, indent=2)
    print(f"\nSaved intermediate results up to state {last_processed_state}")

def load_intermediate_results(last_processed_state):
    with open(f'states_intermediate_{last_processed_state}.json', 'r') as f:
        states = json.load(f)
    with open(f'districts_intermediate_{last_processed_state}.json', 'r') as f:
        districts = json.load(f)
    return states, districts

def find_last_processed_state():
    files = os.listdir()
    intermediate_files = [f for f in files if f.startswith('states_intermediate_') and f.endswith('.json')]
    if not intermediate_files:
        return 0
    last_file = max(intermediate_files)
    return int(last_file.split('_')[-1].split('.')[0])

def main(start_state=None):
    if start_state is None:
        last_processed_state = find_last_processed_state()
        start_state = last_processed_state + 1
        if last_processed_state > 0:
            print(f"Resuming from state {start_state}")
            states, districts = load_intermediate_results(last_processed_state)
        else:
            print("Starting new discovery process")
            states, districts = {}, {}
    else:
        print(f"Starting from specified state {start_state}")
        states, districts = {}, {}

    total_requests = 0
    successful_extractions = 0

    for state in range(start_state, 100):  # Checking state codes from start_state to 99
        state_code = f"{state:02d}"
        if state_code not in districts:
            districts[state_code] = {}
        print(f"\nChecking state code: {state_code}")

        for district in range(1, 100):  # Assuming max 99 districts per state
            total_requests += 1
            status, content = test_api(state, district, 1, 2023)  # Using January 2023
            
            if status == 200 and isinstance(content, dict):
                state_name, district_name = extract_info(content)
                
                if state_name and district_name:
                    successful_extractions += 1
                    if state_code not in states:
                        states[state_code] = state_name
                        print(f"Discovered new state: {state_name} ({state_code})")

                    district_code = f"{state_code}{district:02d}"
                    districts[state_code][district_code] = district_name

                    print(f"Found: State {state_name} ({state_code}), District {district_name} ({district_code})")
                else:
                    print(f"No data for state code {state_code}, district number {district:02d}")
            else:
                print(f"Failed or empty response for state code {state_code}, district number {district:02d}")

            time.sleep(0.5)  # Add a small delay to avoid overwhelming the server

            # Print progress every 10 requests
            if total_requests % 10 == 0:
                print(f"Progress: {total_requests} requests made, {successful_extractions} successful extractions")

        # Save intermediate results every 3 states
        if state % 3 == 0:
            save_intermediate_results(states, districts, state)

        # Add a 15-second break after processing each state
        print(f"Finished processing state code {state_code}. Taking a 15-second break...")
        time.sleep(15)

    # Save the final results
    with open('states_final.json', 'w') as f:
        json.dump(states, f, indent=2)
    print("\nSaved final states data to states_final.json")

    with open('districts_final.json', 'w') as f:
        json.dump(districts, f, indent=2)
    print("Saved final districts data to districts_final.json")

    print(f"\nDiscovery process completed.")
    print(f"Total requests made: {total_requests}")
    print(f"Successful extractions: {successful_extractions}")
    print(f"States discovered: {len(states)}")
    print(f"Districts discovered: {sum(len(districts[state]) for state in districts)}")

if __name__ == "__main__":
    main(start_state=16)

# Extracting data for the last 5 years

In [1]:
import os
import requests
import csv
from time import sleep
import json

In [4]:
with open('states_final.json', 'r') as f:
    state_codes = json.load(f)

In [19]:
state_codes.pop('01')

'ANDHRA PRADESH'

In [5]:
with open('districts_final.json', 'r') as f:
    district_codes = json.load(f)

In [10]:
['01','08','09','18', '26'] # South indian states

for i in list(state_codes):
    if i not in ['01','08','09','18', '26']:
        del state_codes[i]
        del district_codes[i]

In [11]:
state_codes

{'01': 'ANDHRA PRADESH',
 '08': 'KARNATAKA',
 '09': 'KERALA',
 '18': 'TAMILNADU',
 '26': 'TELANGANA'}

In [12]:
district_codes

{'01': {'0101': 'SRIKAKULAM',
  '0102': 'VIZIANAGARM',
  '0103': 'VISAKHAPATNAM',
  '0104': 'EAST GODAVARI',
  '0105': 'WEST GODAVARI',
  '0106': 'KRISHNA',
  '0107': 'GUNTUR',
  '0108': 'PRAKASAM',
  '0109': 'NELLORE',
  '0110': 'KURNOOL',
  '0111': 'ANANTPUR',
  '0112': 'Y S R ',
  '0113': 'CHITTOOR'},
 '08': {'0801': 'BANGALORE',
  '0802': 'BELGAUM',
  '0803': 'BELLARY',
  '0804': 'BIDAR',
  '0805': 'BIJAPUR',
  '0806': 'CHIKMAGALUR',
  '0807': 'CHITRADURGA',
  '0808': 'DAKSHINAKANNADA',
  '0809': 'DHARWAD',
  '0810': 'GULBARGA',
  '0811': 'HASSAN',
  '0812': 'KODAGU',
  '0813': 'KOLAR',
  '0814': 'MANDYA',
  '0815': 'MYSORE',
  '0816': 'RAICHUR',
  '0817': 'SHIMOGA',
  '0818': 'TUMKUR',
  '0819': 'UTTARAKANNADA',
  '0820': 'RAMANAGARA',
  '0821': 'BANGALORE RURAL',
  '0822': 'BAGALKOT',
  '0823': 'CHAMARAJANAGAR',
  '0824': 'DAVANAGERE',
  '0825': 'GADAG',
  '0826': 'HAVERI',
  '0827': 'KOPPAL',
  '0828': 'CHIKKABALLAPUR',
  '0829': 'UDUPI',
  '0830': 'YADAGIRI'},
 '09': {'0901': '

In [21]:
BASE_URL = 'https://dackkms.gov.in/Account/API/kKMS_QueryData.aspx'

months = range(1, 13)
years = range(2015, 2019)

base_folder = 'data'
years_folder = os.path.join(base_folder, 'years')
os.makedirs(years_folder, exist_ok=True)


In [18]:
### for testing
# state_codes = {
#     "20": "UTTAR PRADESH"
# }
# district_codes = {

#     "20": {
#         "2001": "SAHARANPUR",
#         "2002": "MUZAFFARNAGAR",
#     }
# }


# months = range(1, 3)  # Test for January and February only
# years = range(2023, 2024)

In [22]:
for year in years:
    year_folder = os.path.join(years_folder, str(year))
    os.makedirs(year_folder, exist_ok = True)
    
    for state_code, state_name in state_codes.items():
        state_file = os.path.join(year_folder, f"{state_code}_{state_name}.csv")
        state_data = []
        
        for district_code, district_name in district_codes[state_code].items():
            for month in months:
                url = f"{BASE_URL}?StateCD={state_code}&DistrictCd={district_code}&Month={month}&Year={year}"
                
                try:
                    response = requests.get(url)
                    response.raise_for_status()
                    data = response.json()
                    
                    if 'data' in data and data['data']:
                        for row in data['data']:
                            row['district_code'] = district_code
                            row['district_name'] = district_name
                            row['month'] = month
                            state_data.append(row)
                            
                    print(f"Data fetched for {state_name}, {district_name}, {year}-{month:02d}")
                    
                except requests.RequestException as e:
                    print(f"Error fetching data for {state_name}, {district_name}, {year}-{month:02d}: {e}")
                
                sleep(0.2)
                
        if state_data:
            with open(state_file, 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = list(state_data[0].keys())
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(state_data)
            print(f"Data saved to {state_file}")
        else:
            print("No data to save for {state_name} in {year}")
            
print("Data Extraction samapt hua")

Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-01
Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-02
Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-03
Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-04
Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-05
Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-06
Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-07
Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-08
Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-09
Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-10
Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-11
Data fetched for ANDHRA PRADESH, SRIKAKULAM, 2015-12
Data fetched for ANDHRA PRADESH, VIZIANAGARM, 2015-01
Data fetched for ANDHRA PRADESH, VIZIANAGARM, 2015-02
Data fetched for ANDHRA PRADESH, VIZIANAGARM, 2015-03
Data fetched for ANDHRA PRADESH, VIZIANAGARM, 2015-04
Data fetched for ANDHRA PRADESH, VIZIANAGARM, 2015-05
Data fetched for ANDHRA PRADESH, VIZIANAGARM, 2015-06
Data fetched for ANDHRA PRADESH, VIZIANA