In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'indian-penal-code-ipc-sections-information:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4280663%2F7368197%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240402%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240402T141226Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Db71aa4609fe7404d350a3ef552df655515d924d5eb9191f6440b0fd2738139c6fca383574c9c0c39626c33b58116ab7a282b12d13388243c6f02cf37dbbabace24bf73d726ef8ddc66466015b50776a26170546be734bd739ed80072f450def3279d1c1eb620fd75793a482b2be22dd5d7b74ff9b57ec008a7d9d35ad421938a4564fd4eef40a13d1fcf872c952b59eeeeff5c7ac282826e85f15177a10dacd332ee99204c07edd05dca4e913438c5c6418e123efc5901a0debdb2c126dd736c27bf8284441cbc222caa2d605088c2a2c88e21a38c3bc42f2a9f8d510c6f113ba80d2255d57b0de46b40f6f95f231089dc133dd8db388dfb80e63b76bfa69248'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Load the dataset
file_path = '/kaggle/input/indian-penal-code-ipc-sections-information/ipc_sections.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset to get an overview
df.head()

In [None]:
import numpy as np
import pandas as pd

# Load the dataset
file_path = '/kaggle/input/indian-penal-code-ipc-sections-information/ipc_sections.csv'
df = pd.read_csv(file_path)

# Extract relevant columns for input features and target variable
X = df[['Offense', 'Punishment']].values  # Input features
y = df['Description'].values  # Target variable

# Print the first few rows of X and y to verify the data
print("Input Features (X):")
print(X[:5])

print("\nTarget Variable (y):")
print(y[:5])

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
# Check the data types of 'Offense' and 'Punishment'
print("Data Types:")
print("Offense:", X[:, 0].dtype)
print("Punishment:", X[:, 1].dtype)

# Convert 'Offense' and 'Punishment' to strings if they are not already
X[:, 0] = X[:, 0].astype(str)
X[:, 1] = X[:, 1].astype(str)

# Combine 'Offense' and 'Punishment' into a single text feature
X_text = X[:, 0] + ' ' + X[:, 1]

# Continue with the rest of the code...


# Combine 'Offense' and 'Punishment' into a single text feature
X_text = X[:, 0] + ' ' + X[:, 1]

# Use CountVectorizer to convert text data to numerical format
vectorizer = CountVectorizer()
X_numerical = vectorizer.fit_transform(X_text)

# Use LabelEncoder to convert the target variable to numerical format
label_encoder = LabelEncoder()
y_numerical = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_numerical, y_numerical, test_size=0.2, random_state=42)

# Print the shape of the numerical features and target variable
print("Shape of X_numerical:", X_numerical.shape)
print("Shape of y_numerical:", y_numerical.shape)

In [None]:
from keras.models import Sequential
from keras.layers import Dense

# Define the neural network model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(np.unique(y_numerical)), activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Print the model summary
model.summary()

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.preprocessing import LabelEncoder  # Add this import
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Load the dataset
file_path = '/kaggle/input/indian-penal-code-ipc-sections-information/ipc_sections.csv'
df = pd.read_csv(file_path)

# Extract relevant columns for input features and target variable
X = df[['Offense', 'Punishment']].astype(str).values  # Convert to string
y = df['Description'].values  # Target variable

# Convert labels to numerical format using LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Number of unique classes in your dataset
output_neurons = len(np.unique(y))
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Combine 'Offense' and 'Punishment' into a single text feature
X_text_train = X_train[:, 0] + ' ' + X_train[:, 1]
X_text_test = X_test[:, 0] + ' ' + X_test[:, 1]

# Use CountVectorizer to convert text data to numerical format
vectorizer = CountVectorizer()
X_train_numerical = vectorizer.fit_transform(X_text_train)
X_test_numerical = vectorizer.transform(X_text_test)

# Compute sample weights for balancing classes
sample_weights = compute_sample_weight('balanced', y_train)

# Define the neural network model with adjustments
model = Sequential()
model.add(Dense(128, input_dim=X_train_numerical.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))

# Set the number of output neurons to match the unique classes
model.add(Dense(output_neurons, activation='softmax'))

# Compile the model with sample weights
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with sample weights
history_weighted = model.fit(X_train_numerical.toarray(), y_train, epochs=1000, batch_size=100, validation_split=0.2, sample_weight=sample_weights)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_numerical.toarray(), y_test)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

In [None]:
# Your specific input
input_text = "robbery"

# Combine with additional information if needed
# For example, if you have additional context in another variable 'context_text'
# you can concatenate them: input_text = input_text + ' ' + context_text

# Use CountVectorizer to convert the input text to numerical format
input_numerical = vectorizer.transform([input_text])

# Make a prediction using the trained model
prediction = model.predict(input_numerical.toarray())

# If you want the predicted class label (assuming one-hot encoding is used)
predicted_label = np.argmax(prediction)

# If you want to convert the predicted label back to the original label
predicted_label_original = label_encoder.inverse_transform([predicted_label])

# Display or use the prediction as needed
print(predicted_label_original)

In [None]:
model.save('/kaggle/working/text_prediction_model.keras')
