In [None]:
%pip install tensorflow
%pip install scikit-learn
%pip install pandas
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Load the data and preprocess
df = pd.read_csv('career_recommendations.csv')

# Define input and target columns
input_columns = ['Stream', 'Interest1', 'Interest2']
target_columns = ['Suggested_Field', 'MainJobProfiles',
                  'CareerPath', 'OtherJobProfiles']

# Encode categorical variables
label_encoders = {}
for column in input_columns + target_columns:
    le = LabelEncoder()
    le.fit(df[column].astype(str))
    df[column] = le.transform(df[column].astype(str))
    label_encoders[column] = le

# Separate features and target
X = df[input_columns]
y = df[target_columns]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Get input and output shapes
input_shape = X_train.shape[1]
output_shapes = [len(label_encoders[col].classes_) for col in target_columns]

# Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(input_shape,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(108, activation='relu'),
    tf.keras.layers.Dense(108, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(sum(output_shapes), activation='softmax')
])

# Compile the m108del
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Prepare the target outputs for model fitting
y_train_targets = [y_train[col] for col in target_columns]

# Fit the model
model.fit(X_train, y_train_targets, epochs=100, validation_split=0.2)

# Define 10 sample inputs using valid labels
sample_inputs = [
    {'Stream': 'Science', 'Interest1': 'Engineering', 'Interest2': 'Finance'},
    {'Stream': 'Commerce', 'Interest1': 'Finance', 'Interest2': 'Economics'},
    {'Stream': 'Arts', 'Interest1': 'History', 'Interest2': 'Political Science'},
    {'Stream': 'Science', 'Interest1': 'Medical', 'Interest2': 'Law'},
    {'Stream': 'Commerce', 'Interest1': 'Accounting', 'Interest2': 'Business'},
    {'Stream': 'Science', 'Interest1': 'Engineering', 'Interest2': 'Medical'},
    {'Stream': 'Arts', 'Interest1': 'Literature', 'Interest2': 'Philosophy'},
    {'Stream': 'Commerce', 'Interest1': 'Marketing', 'Interest2': 'Finance'},
    {'Stream': 'Science', 'Interest1': 'Physics', 'Interest2': 'Engineering'},
    {'Stream': 'Arts', 'Interest1': 'Sociology', 'Interest2': 'Psychology'}
]

# Encode and predict for each sample input
predicted_outputs = []

for sample_input in sample_inputs:
    try:
        # Encode the sample input
        encoded_input = [label_encoders[col].transform(
            [sample_input[col]])[0] for col in input_columns]
        encoded_input = np.array(encoded_input).reshape(
            1, -1)  # Reshape to match model input shape

        # Predict using the trained model
        predictions = model.predict(encoded_input)

        # Decode predictions to human-readable form
        decoded_predictions = {}
        output_index = 0
        for idx, col in enumerate(target_columns):
            # Argmax to find predicted class
            col_pred = np.argmax(
                predictions[0][output_index:output_index + output_shapes[idx]])
            decoded_predictions[col] = label_encoders[col].inverse_transform([col_pred])[
                0]
            output_index += output_shapes[idx]

        predicted_outputs.append({
            'Input': sample_input,
            'Output': decoded_predictions
        })
    except KeyError as e:
        print(f"KeyError: {e} - Ensure the sample input values are valid and match the training data.")
    except ValueError as e:
        print(f"ValueError: {e} - Ensure the sample input values are correctly encoded and match the training data.")

# Print all predicted outputs
for result in predicted_outputs:
    print(f"Sample Input: {result['Input']}")
    print(f"Predicted Output: {result['Output']}")
    print('---')

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


2024-10-09 11:18:55.361486: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-09 11:18:55.451685: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-09 11:18:55.478693: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-09 11:18:55.615186: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regu

Epoch 1/100


I0000 00:00:1728452944.928404   25946 service.cc:146] XLA service 0xc01fda0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1728452944.928441   25946 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce GTX 1650, Compute Capability 7.5
2024-10-09 11:19:04.964769: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-10-09 11:19:05.158970: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m  45/2000[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7s[0m 4ms/step - accuracy: 0.0259 - loss: 10.5598      

I0000 00:00:1728452947.453459   25946 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 5ms/step - accuracy: 0.1487 - loss: 3.0404 - val_accuracy: 0.2274 - val_loss: 1.5152
Epoch 2/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 15ms/step - accuracy: 0.2309 - loss: 1.5323 - val_accuracy: 0.2289 - val_loss: 1.5019
Epoch 3/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.2304 - loss: 1.4977

In [9]:
# Print valid labels for input columns
for col in input_columns:
    print(f"Valid labels for '{col}': {label_encoders[col].classes_}")

Valid labels for 'Suggested_Field': ['Accounting' 'Ancient History' 'Audit' 'Chemical Engineering'
 'Civil Engineering' 'Computer Engineering' 'Corporate Finance'
 'Corporate Law' 'Criminal Law' 'Cultural History' 'Dentistry'
 'Development Economics' 'Electrical Engineering' 'Family Law'
 'Financial Analysis' 'Financial Planning' 'Government Administration'
 'Human Rights Law' 'International Economics' 'International Relations'
 'Investment Banking' 'MBBS' 'Macroeconomics' 'Mechanical Engineering'
 'Microeconomics' 'Modern History' 'Museum Studies' 'Nursing' 'Pharmacy'
 'Political Analysis' 'Public Health' 'Public Policy' 'Risk Management'
 'Taxation']
Valid labels for 'Interest1': ['Chartered Accountant' 'Economics' 'Engineering' 'Finance' 'History'
 'Law' 'Medical' 'Political Science']
Valid labels for 'Interest2': ['Chartered Accountant' 'Economics' 'Engineering' 'Finance' 'History'
 'Law' 'Medical' 'Political Science']
Valid labels for 'Stream': ['Arts' 'Commerce' 'Science']


In [11]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Is GPU available:", tf.config.list_physical_devices('GPU'))

if tf.config.list_physical_devices('GPU'):
    print("TensorFlow is using GPU")
else:
    print("TensorFlow is not using GPU")

TensorFlow version: 2.17.0
Is GPU available: []
TensorFlow is not using GPU


In [3]:
import tensorflow as tf
print("Is TensorFlow using GPU?", tf.config.list_physical_devices('GPU'))

Is TensorFlow using GPU? []
