In [1]:
import os
import requests
import csv
from time import sleep

In [None]:
import requests
import time
import json

base_url = "https://dackkms.gov.in/Account/API/kKMS_QueryData.aspx"

def test_api(state, district, month, year):
    state_code = f"{state:02d}"
    district_code = f"{state_code}{district:02d}"
    params = {
        "StateCD": state_code,
        "DistrictCd": district_code,
        "Month": month,
        "Year": year
    }
    try:
        response = requests.get(base_url, params=params)
        return response.status_code, response.json()
    except requests.RequestException as e:
        return 0, str(e)

def extract_info(content):
    if content['ResponseCode'] == '1' and content['Response'] == 'Data Found':
        if content['data']:
            return content['data'][0]['StateName'], content['data'][0]['DistrictName']
    return None, None

def save_intermediate_results(states, districts, last_processed_state):
    with open(f'states_intermediate_{last_processed_state}.json', 'w') as f:
        json.dump(states, f, indent=2)
    with open(f'districts_intermediate_{last_processed_state}.json', 'w') as f:
        json.dump(districts, f, indent=2)
    print(f"\nSaved intermediate results up to state {last_processed_state}")

def main():
    states = {}
    districts = {}
    total_requests = 0
    successful_extractions = 0

    print("Starting the discovery process...")

    for state in range(1, 100):  # Checking state codes from 01 to 99
        state_code = f"{state:02d}"
        districts[state_code] = {}
        print(f"\nChecking state code: {state_code}")

        for district in range(1, 100):  # Assuming max 99 districts per state
            total_requests += 1
            status, content = test_api(state, district, 1, 2023)  # Using January 2023
            
            if status == 200 and isinstance(content, dict):
                state_name, district_name = extract_info(content)
                
                if state_name and district_name:
                    successful_extractions += 1
                    if state_code not in states:
                        states[state_code] = state_name
                        print(f"Discovered new state: {state_name} ({state_code})")

                    district_code = f"{state_code}{district:02d}"
                    districts[state_code][district_code] = district_name

                    print(f"Found: State {state_name} ({state_code}), District {district_name} ({district_code})")
                else:
                    print(f"No data for state code {state_code}, district number {district:02d}")
            else:
                print(f"Failed or empty response for state code {state_code}, district number {district:02d}")

            time.sleep(0.5)  # Add a small delay to avoid overwhelming the server

            # Print progress every 10 requests
            if total_requests % 10 == 0:
                print(f"Progress: {total_requests} requests made, {successful_extractions} successful extractions")

        # Save intermediate results every 3 states
        if state % 3 == 0:
            save_intermediate_results(states, districts, state)

        # Add a 15-second break after processing each state
        print(f"Finished processing state code {state_code}. Taking a 15-second break...")
        time.sleep(15)

    # Save the final results
    with open('states_final.json', 'w') as f:
        json.dump(states, f, indent=2)
    print("\nSaved final states data to states_final.json")

    with open('districts_final.json', 'w') as f:
        json.dump(districts, f, indent=2)
    print("Saved final districts data to districts_final.json")

    print(f"\nDiscovery process completed.")
    print(f"Total requests made: {total_requests}")
    print(f"Successful extractions: {successful_extractions}")
    print(f"States discovered: {len(states)}")
    print(f"Districts discovered: {sum(len(districts[state]) for state in districts)}")

if __name__ == "__main__":
    main()

In [None]:
import requests
import time
import json
import os

base_url = "https://dackkms.gov.in/Account/API/kKMS_QueryData.aspx"

def test_api(state, district, month, year):
    state_code = f"{state:02d}"
    district_code = f"{state_code}{district:02d}"
    params = {
        "StateCD": state_code,
        "DistrictCd": district_code,
        "Month": month,
        "Year": year
    }
    try:
        response = requests.get(base_url, params=params)
        return response.status_code, response.json()
    except requests.RequestException as e:
        return 0, str(e)

def extract_info(content):
    if content['ResponseCode'] == '1' and content['Response'] == 'Data Found':
        if content['data']:
            return content['data'][0]['StateName'], content['data'][0]['DistrictName']
    return None, None

def save_intermediate_results(states, districts, last_processed_state):
    with open(f'states_intermediate_{last_processed_state}.json', 'w') as f:
        json.dump(states, f, indent=2)
    with open(f'districts_intermediate_{last_processed_state}.json', 'w') as f:
        json.dump(districts, f, indent=2)
    print(f"\nSaved intermediate results up to state {last_processed_state}")

def load_intermediate_results(last_processed_state):
    with open(f'states_intermediate_{last_processed_state}.json', 'r') as f:
        states = json.load(f)
    with open(f'districts_intermediate_{last_processed_state}.json', 'r') as f:
        districts = json.load(f)
    return states, districts

def find_last_processed_state():
    files = os.listdir()
    intermediate_files = [f for f in files if f.startswith('states_intermediate_') and f.endswith('.json')]
    if not intermediate_files:
        return 0
    last_file = max(intermediate_files)
    return int(last_file.split('_')[-1].split('.')[0])

def main(start_state=None):
    if start_state is None:
        last_processed_state = find_last_processed_state()
        start_state = last_processed_state + 1
        if last_processed_state > 0:
            print(f"Resuming from state {start_state}")
            states, districts = load_intermediate_results(last_processed_state)
        else:
            print("Starting new discovery process")
            states, districts = {}, {}
    else:
        print(f"Starting from specified state {start_state}")
        states, districts = {}, {}

    total_requests = 0
    successful_extractions = 0

    for state in range(start_state, 100):  # Checking state codes from start_state to 99
        state_code = f"{state:02d}"
        if state_code not in districts:
            districts[state_code] = {}
        print(f"\nChecking state code: {state_code}")

        for district in range(1, 100):  # Assuming max 99 districts per state
            total_requests += 1
            status, content = test_api(state, district, 1, 2023)  # Using January 2023
            
            if status == 200 and isinstance(content, dict):
                state_name, district_name = extract_info(content)
                
                if state_name and district_name:
                    successful_extractions += 1
                    if state_code not in states:
                        states[state_code] = state_name
                        print(f"Discovered new state: {state_name} ({state_code})")

                    district_code = f"{state_code}{district:02d}"
                    districts[state_code][district_code] = district_name

                    print(f"Found: State {state_name} ({state_code}), District {district_name} ({district_code})")
                else:
                    print(f"No data for state code {state_code}, district number {district:02d}")
            else:
                print(f"Failed or empty response for state code {state_code}, district number {district:02d}")

            time.sleep(0.5)  # Add a small delay to avoid overwhelming the server

            # Print progress every 10 requests
            if total_requests % 10 == 0:
                print(f"Progress: {total_requests} requests made, {successful_extractions} successful extractions")

        # Save intermediate results every 3 states
        if state % 3 == 0:
            save_intermediate_results(states, districts, state)

        # Add a 15-second break after processing each state
        print(f"Finished processing state code {state_code}. Taking a 15-second break...")
        time.sleep(15)

    # Save the final results
    with open('states_final.json', 'w') as f:
        json.dump(states, f, indent=2)
    print("\nSaved final states data to states_final.json")

    with open('districts_final.json', 'w') as f:
        json.dump(districts, f, indent=2)
    print("Saved final districts data to districts_final.json")

    print(f"\nDiscovery process completed.")
    print(f"Total requests made: {total_requests}")
    print(f"Successful extractions: {successful_extractions}")
    print(f"States discovered: {len(states)}")
    print(f"Districts discovered: {sum(len(districts[state]) for state in districts)}")

if __name__ == "__main__":
    main(start_state=16)

# Extracting data for the last 5 years

In [1]:
import os
import requests
import csv
from time import sleep
import json

In [2]:
with open('states_final.json', 'r') as f:
    state_codes = json.load(f)

In [3]:
to_remove_from_dict = []
for i in range(1,9):
    # print(f"'{i}'", end=", ")
    to_remove_from_dict.append(f"0{i}")
to_remove_from_dict
# type(to_remove_from_dict[0])

['01', '02', '03', '04', '05', '06', '07', '08']

In [19]:
state_codes.pop('01')

'ANDHRA PRADESH'

In [4]:
with open('districts_final.json', 'r') as f:
    district_codes = json.load(f)

In [5]:
for i in to_remove_from_dict:
    del state_codes[f"{i}"]
    del district_codes[f"{i}"]
    # print(type(i))

In [6]:
state_codes

{'09': 'KERALA',
 '10': 'MADHYA PRADESH',
 '11': 'MAHARASHTRA',
 '12': 'MANIPUR',
 '13': 'MEGHALAYA',
 '14': 'NAGALAND',
 '15': 'ODISHA',
 '16': 'PUNJAB',
 '17': 'RAJASTHAN',
 '18': 'TAMILNADU',
 '19': 'TRIPURA',
 '20': 'UTTAR PRADESH',
 '21': 'WEST BENGAL',
 '22': 'SIKKIM',
 '23': 'CHHATTISGARH',
 '24': 'JHARKAND',
 '25': 'UTTARAKHAND',
 '26': 'TELANGANA',
 '31': 'A AND N ISLANDS',
 '32': 'ARUNACHAL PRADESH',
 '33': 'CHANDIGARH',
 '34': 'DADRA AND NAGAR HAVELI',
 '35': 'DELHI',
 '36': 'DAMAN AND DIU'}

In [7]:
district_codes

{'09': {'0901': 'TRIVANDRUM',
  '0903': 'ALAPPUZHA',
  '0904': 'KOTTAYAM',
  '0905': 'IDUKKI',
  '0906': 'ERNAKULAM',
  '0907': 'THRISSUR',
  '0908': 'PALAKKAD',
  '0909': 'MALAPPURAM',
  '0910': 'KOZHIKODE',
  '0911': 'WAYANAD',
  '0912': 'KANNUR',
  '0913': 'PATHANAMTHITTA',
  '0914': 'KASARGOD',
  '0915': 'KOLLAM'},
 '10': {'1001': 'SAGAR',
  '1002': 'DAMOH',
  '1003': 'JABALPUR',
  '1004': 'MANDLA',
  '1005': 'HOSHANGABAD',
  '1006': 'NARSIMPUR',
  '1007': 'EAST NIMAR',
  '1008': 'BALAGHAT',
  '1009': 'BETUL',
  '1010': 'CHHINDWARA',
  '1011': 'SEONI',
  '1012': 'AGAR',
  '1019': 'BHIND',
  '1020': 'GWALIOR',
  '1021': 'SHIVPURI',
  '1022': 'GUNA',
  '1023': 'VIDISHA',
  '1024': 'RAJGARH',
  '1025': 'MORENA',
  '1026': 'SHAJAPUR',
  '1027': 'UJJAIN',
  '1028': 'RATLAM',
  '1029': 'MANDSAUR',
  '1030': 'DEWAS',
  '1031': 'INDORE',
  '1032': 'WEST NIMAR',
  '1033': 'DHAR',
  '1034': 'JHABUA',
  '1035': 'REWA',
  '1036': 'SATNA',
  '1037': 'SIDHI',
  '1038': 'SHAHDOL',
  '1039': 'DATI

In [8]:
BASE_URL = 'https://dackkms.gov.in/Account/API/kKMS_QueryData.aspx'

months = range(1, 13)
years = range(2019, 2020)

base_folder = 'data'
years_folder = os.path.join(base_folder, 'years')
os.makedirs(years_folder, exist_ok=True)


### for testing
state_codes = {
    "20": "UTTAR PRADESH"
}
district_codes = {

    "20": {
        "2001": "SAHARANPUR",
        "2002": "MUZAFFARNAGAR",
    }
}


months = range(1, 3)  # Test for January and February only
years = range(2023, 2024)

In [None]:
for year in years:
    year_folder = os.path.join(years_folder, str(year))
    os.makedirs(year_folder, exist_ok = True)
    
    for state_code, state_name in state_codes.items():
        state_file = os.path.join(year_folder, f"{state_code}_{state_name}.csv")
        state_data = []
        
        for district_code, district_name in district_codes[state_code].items():
            for month in months:
                url = f"{BASE_URL}?StateCD={state_code}&DistrictCd={district_code}&Month={month}&Year={year}"
                
                try:
                    response = requests.get(url)
                    response.raise_for_status()
                    data = response.json()
                    
                    if 'data' in data and data['data']:
                        for row in data['data']:
                            row['district_code'] = district_code
                            row['district_name'] = district_name
                            row['month'] = month
                            state_data.append(row)
                            
                    print(f"Data fetched for {state_name}, {district_name}, {year}-{month:02d}")
                    
                except requests.RequestException as e:
                    print(f"Error fetching data for {state_name}, {district_name}, {year}-{month:02d}: {e}")
                
                sleep(0.5)
                
        if state_data:
            with open(state_file, 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = list(state_data[0].keys())
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(state_data)
            print(f"Data saved to {state_file}")
        else:
            print("No data to save for {state_name} in {year}")
            
print("Data Extraction samapt hua")