# **Decentralized Digital Identity using Blockchain**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install Faker



## **High-Voume Data Generation Script**

In [None]:
import csv
from faker import Faker
import uuid

fake = Faker()
record_count = 20000

# File name for your research data
filename = "student_identity_records.csv"

print(f"Generating {record_count} records... Please wait.")

with open(filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Headers for your Research Dataset
    writer.writerow(["Student_ID", "DID_Address", "Full_Name", "Degree", "Graduation_Year", "Email"])

    for _ in range(record_count):
        student_id = str(uuid.uuid4())[:8] # Short unique ID
        wallet_address = fake.hexify(text='0x' + '^' * 40) # Mock Blockchain Address

        writer.writerow([
            student_id,
            f"did:ethr:{wallet_address}",
            fake.name(),
            "B.Sc. Information Technology",
            "2026",
            fake.ascii_free_email()
        ])

print(f"Successfully generated {record_count} records in {filename}!")

Generating 20000 records... Please wait.
Successfully generated 20000 records in student_identity_records.csv!


In [None]:
import json

with open('/content/drive/MyDrive/student_data.json', 'r') as f:
    loaded_data = json.load(f)
students = loaded_data["students"]
hashes = loaded_data["hashes"]

print(f"✅ Loaded {len(students)} students + {len(hashes)} hashes")
print(f"First student: {students[0]}")
print(f"First hash:    {hashes[0][:20]}...")

✅ Loaded 20000 students + 20000 hashes
First student: 0xef913c0bb17d0f37d7fdd895b84283824dd444da
First hash:    Qm19076634b8f9dd4649...


# **Solidity**

In [None]:
!pip install web3 faker

Collecting web3
  Downloading web3-7.14.0-py3-none-any.whl.metadata (5.6 kB)
Collecting eth-abi>=5.0.1 (from web3)
  Downloading eth_abi-5.2.0-py3-none-any.whl.metadata (3.8 kB)
Collecting eth-account>=0.13.6 (from web3)
  Downloading eth_account-0.13.7-py3-none-any.whl.metadata (3.7 kB)
Collecting eth-hash>=0.5.1 (from eth-hash[pycryptodome]>=0.5.1->web3)
  Downloading eth_hash-0.7.1-py3-none-any.whl.metadata (4.2 kB)
Collecting eth-typing>=5.0.0 (from web3)
  Downloading eth_typing-5.2.1-py3-none-any.whl.metadata (3.2 kB)
Collecting eth-utils>=5.0.0 (from web3)
  Downloading eth_utils-5.3.1-py3-none-any.whl.metadata (5.7 kB)
Collecting hexbytes>=1.2.0 (from web3)
  Downloading hexbytes-1.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting types-requests>=2.0.0 (from web3)
  Downloading types_requests-2.32.4.20260107-py3-none-any.whl.metadata (2.0 kB)
Collecting pyunormalize>=15.0.0 (from web3)
  Downloading pyunormalize-17.0.0-py3-none-any.whl.metadata (5.7 kB)
Collecting parsimonious<

In [None]:
%%writefile BatchIdentityRegistry.sol
// SPDX-License-Identifier: MIT
pragma solidity ^0.8.0;

contract BatchIdentityRegistry {
    struct Identity {
        string ipfsHash;
        uint256 timestamp;
    }
    mapping(address => Identity) public registry;

    function batchRegister(address[] memory _students, string[] memory _hashes) public {
        for (uint256 i = 0; i < _students.length; i++) {
            registry[_students[i]] = Identity(_hashes[i], block.timestamp);
        }
    }
}

Writing BatchIdentityRegistry.sol


# **Bridge between python and solidity**

Here is the Python logic to process your 10,000 records in batches of 100:

In [None]:
from faker import Faker
import json

fake = Faker()
total_records = 20000
batch_size = 100

# 1. Generate the 20,000 records
students = []
hashes = []
for _ in range(total_records):
    students.append(fake.hexify(text='0x' + '^' * 40)) # Mock Wallet
    hashes.append(f"Qm{fake.sha256()[:44]}")          # Mock IPFS Hash

# 2. Function to process in batches (Explanation for your paper)
def process_batches(student_list, hash_list, size):
    for i in range(0, len(student_list), size):
        batch_students = student_list[i:i + size]
        batch_hashes = hash_list[i:i + size]

        # In a real scenario, you would call the smart contract here:
        # contract.functions.batchRegister(batch_students, batch_hashes).transact()

        print(f"Uploaded batch starting at index {i}... (Total: {i+size})")

process_batches(students, hashes, batch_size)

Uploaded batch starting at index 0... (Total: 100)
Uploaded batch starting at index 100... (Total: 200)
Uploaded batch starting at index 200... (Total: 300)
Uploaded batch starting at index 300... (Total: 400)
Uploaded batch starting at index 400... (Total: 500)
Uploaded batch starting at index 500... (Total: 600)
Uploaded batch starting at index 600... (Total: 700)
Uploaded batch starting at index 700... (Total: 800)
Uploaded batch starting at index 800... (Total: 900)
Uploaded batch starting at index 900... (Total: 1000)
Uploaded batch starting at index 1000... (Total: 1100)
Uploaded batch starting at index 1100... (Total: 1200)
Uploaded batch starting at index 1200... (Total: 1300)
Uploaded batch starting at index 1300... (Total: 1400)
Uploaded batch starting at index 1400... (Total: 1500)
Uploaded batch starting at index 1500... (Total: 1600)
Uploaded batch starting at index 1600... (Total: 1700)
Uploaded batch starting at index 1700... (Total: 1800)
Uploaded batch starting at inde

# **Connecting python to the provider**

# **Smart Contract**

In [None]:
# 1. Paste the address from the 'Deployed Contracts' section in Remix. This must be the actual hexadecimal address (e.g., 0x...) of your deployed contract, NOT an API key or project ID.
CONTRACT_ADDRESS = "0xd9145CCE52D386f254917e481eB44e9943F39138"
# 2. Paste the ABI from the 'Solidity Compiler' tab in Remix
import json
CONTRACT_ABI = json.loads('''[
	{
		"inputs": [
			{
				"internalType": "address[]",
				"name": "_students",
				"type": "address[]"
			},
			{
				"internalType": "string[]",
				"name": "_hashes",
				"type": "string[]"
			}
		],
		"name": "batchRegister",
		"outputs": [],
		"stateMutability": "nonpayable",
		"type": "function"
	},
	{
		"inputs": [
			{
				"internalType": "address",
				"name": "",
				"type": "address"
			}
		],
		"name": "registry",
		"outputs": [
			{
				"internalType": "string",
				"name": "ipfsHash",
				"type": "string"
			},
			{
				"internalType": "uint256",
				"name": "timestamp",
				"type": "uint256"
			}
		],
		"stateMutability": "view",
		"type": "function"
	}
]
''')

# 3. Now this line will work without a NameError
contract = w3.eth.contract(address=CONTRACT_ADDRESS, abi=CONTRACT_ABI)