In [113]:
import numpy as np
import pandas as pd
import openpyxl

import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import tensorflow as tf

In [114]:
data = pd.read_excel('/content/Largest Companies in the World.xlsx')

In [115]:
data

Unnamed: 0,Global Rank,Company,Sales ($billion),Profits ($billion),Assets ($billion),Market Value ($billion),Country,Continent,Latitude,Longitude
0,1.0,ICBC,134.8,37.8,2813.5,237.3,China,Asia,35.861660,104.195397
1,2.0,China Construction Bank,113.1,30.6,2241.0,202.0,China,Asia,35.861660,104.195397
2,3.0,JPMorgan Chase,108.2,21.3,2359.1,191.4,USA,North America,37.090240,-95.712891
3,4.0,General Electric,147.4,13.6,685.3,243.7,USA,North America,37.090240,-95.712891
4,5.0,Exxon Mobil,420.7,44.9,333.8,400.4,USA,North America,37.090240,-95.712891
...,...,...,...,...,...,...,...,...,...,...
1919,1995.0,Tractor Supply,4.7,0.3,1.7,7.1,USA,North America,37.090240,-95.712891
1920,1996.0,San-Ai Oil,0.5,0.1,25.7,0.5,Japan,Asia,36.204824,138.252924
1921,1996.0,UOL Group,0.9,0.7,7.8,4.2,Singapore,Asia,1.352083,103.819836
1922,1998.0,Interconexion Electrica,2.4,0.2,14.6,5.8,Colombia,South America,4.570868,-74.297333


In [116]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1924 entries, 0 to 1923
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Global Rank              1924 non-null   float64
 1   Company                  1924 non-null   object 
 2   Sales ($billion)         1924 non-null   float64
 3   Profits ($billion)       1924 non-null   float64
 4   Assets ($billion)        1924 non-null   float64
 5   Market Value ($billion)  1924 non-null   float64
 6   Country                  1924 non-null   object 
 7   Continent                1924 non-null   object 
 8   Latitude                 1924 non-null   float64
 9   Longitude                1924 non-null   float64
dtypes: float64(7), object(3)
memory usage: 150.4+ KB


In [117]:
def preprocess_inputs(df):
    df = df.copy()
    
    # Drop unused columns
    df = df.drop(['Global Rank', 'Company'], axis=1)
    
    # One-hot encode nominal feature columns
    for column in ['Country', 'Continent']:
        dummies = pd.get_dummies(df[column], prefix=column)
        df = pd.concat([df, dummies], axis=1)
        df = df.drop(column, axis=1)
    
    # Split df into X and y
    y = df['Market Value ($billion)']
    X = df.drop('Market Value ($billion)', axis=1)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=1)
    
    # Scale X
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), index=X_test.index, columns=X_test.columns)
    
    return X_train, X_test, y_train, y_test

In [118]:
X_train, X_test, y_train, y_test = preprocess_inputs(data)

In [119]:
X_train

Unnamed: 0,Sales ($billion),Profits ($billion),Assets ($billion),Latitude,Longitude,Country_Australia,Country_Belgium,Country_Bermuda,Country_Brazil,Country_Canada,Country_Channel Islands,Country_Chile,Country_China,Country_Colombia,Country_Czech Republic,Country_Denmark,Country_Egypt,Country_Finland,Country_France,Country_Germany,Country_Greece,Country_Hong Kong,Country_Hungary,Country_India,Country_Indonesia,Country_Ireland,Country_Israel,Country_Italy,Country_Japan,Country_Jordan,Country_Kazakhstan,Country_Kuwait,Country_Lebanon,Country_Liberia,Country_Luxembourg,Country_Malaysia,Country_Mexico,Country_Morocco,Country_New Zealand,Country_Nigeria,Country_Norway,Country_Oman,Country_Pakistan,Country_Panama,Country_Peru,Country_Philippines,Country_Poland,Country_Portugal,Country_Qatar,Country_Russia,Country_Saudi Arabia,Country_Singapore,Country_South Africa,Country_South Korea,Country_Spain,Country_Sweden,Country_Taiwan,Country_Thailand,Country_The Netherlands,Country_Turkey,Country_UK,Country_USA,Country_United Arab Emirates,Country_Venezuela,Country_Vietnam,Continent_Africa,Continent_Asia,Continent_Europe,Continent_North America,Continent_Oceania,Continent_South America
900,-0.430684,0.264572,-0.276882,0.056200,0.942677,-0.16098,-0.077324,-0.066915,-0.106159,-0.185981,-0.027267,-0.054595,3.649541,-0.061062,-0.027267,-0.072303,-0.027267,-0.082046,-0.192302,-0.156055,-0.077324,-0.153539,-0.038576,-0.165774,-0.061062,-0.086516,-0.072303,-0.134738,-0.38531,-0.027267,-0.038576,-0.054595,-0.027267,-0.027267,-0.054595,-0.098754,-0.102521,-0.038576,0.0,-0.027267,-0.082046,-0.027267,0.0,0.0,-0.027267,-0.066915,-0.077324,0.0,-0.072303,-0.128904,-0.094845,-0.102521,-0.109682,-0.185981,-0.128904,-0.119658,-0.153539,-0.094845,-0.1131,-0.090773,-0.228877,-0.613415,-0.072303,0.0,-0.027267,-0.125893,1.296640,-0.571059,-0.688666,-0.16098,-0.137568
1097,-0.007220,-0.887326,-0.197710,0.626463,-0.160959,-0.16098,-0.077324,-0.066915,-0.106159,-0.185981,-0.027267,-0.054595,-0.274007,-0.061062,-0.027267,-0.072303,-0.027267,-0.082046,5.200160,-0.156055,-0.077324,-0.153539,-0.038576,-0.165774,-0.061062,-0.086516,-0.072303,-0.134738,-0.38531,-0.027267,-0.038576,-0.054595,-0.027267,-0.027267,-0.054595,-0.098754,-0.102521,-0.038576,0.0,-0.027267,-0.082046,-0.027267,0.0,0.0,-0.027267,-0.066915,-0.077324,0.0,-0.072303,-0.128904,-0.094845,-0.102521,-0.109682,-0.185981,-0.128904,-0.119658,-0.153539,-0.094845,-0.1131,-0.090773,-0.228877,-0.613415,-0.072303,0.0,-0.027267,-0.125893,-0.771224,1.751133,-0.688666,-0.16098,-0.137568
1639,-0.499878,-0.237537,-0.292357,0.123788,-1.220713,-0.16098,-0.077324,-0.066915,-0.106159,-0.185981,-0.027267,-0.054595,-0.274007,-0.061062,-0.027267,-0.072303,-0.027267,-0.082046,-0.192302,-0.156055,-0.077324,-0.153539,-0.038576,-0.165774,-0.061062,-0.086516,-0.072303,-0.134738,-0.38531,-0.027267,-0.038576,-0.054595,-0.027267,-0.027267,-0.054595,-0.098754,-0.102521,-0.038576,0.0,-0.027267,-0.082046,-0.027267,0.0,0.0,-0.027267,-0.066915,-0.077324,0.0,-0.072303,-0.128904,-0.094845,-0.102521,-0.109682,-0.185981,-0.128904,-0.119658,-0.153539,-0.094845,-0.1131,-0.090773,-0.228877,1.630217,-0.072303,0.0,-0.027267,-0.125893,-0.771224,-0.571059,1.452083,-0.16098,-0.137568
198,1.038987,0.028285,1.726550,1.129855,-0.222100,-0.16098,-0.077324,-0.066915,-0.106159,-0.185981,-0.027267,-0.054595,-0.274007,-0.061062,-0.027267,-0.072303,-0.027267,-0.082046,-0.192302,-0.156055,-0.077324,-0.153539,-0.038576,-0.165774,-0.061062,-0.086516,-0.072303,-0.134738,-0.38531,-0.027267,-0.038576,-0.054595,-0.027267,-0.027267,-0.054595,-0.098754,-0.102521,-0.038576,0.0,-0.027267,-0.082046,-0.027267,0.0,0.0,-0.027267,-0.066915,-0.077324,0.0,-0.072303,-0.128904,-0.094845,-0.102521,-0.109682,-0.185981,-0.128904,-0.119658,-0.153539,-0.094845,-0.1131,-0.090773,4.369159,-0.613415,-0.072303,0.0,-0.027267,-0.125893,-0.771224,1.751133,-0.688666,-0.16098,-0.137568
460,-0.311671,0.205500,-0.182955,1.489972,0.093728,-0.16098,-0.077324,-0.066915,-0.106159,-0.185981,-0.027267,-0.054595,-0.274007,-0.061062,-0.027267,-0.072303,-0.027267,12.188337,-0.192302,-0.156055,-0.077324,-0.153539,-0.038576,-0.165774,-0.061062,-0.086516,-0.072303,-0.134738,-0.38531,-0.027267,-0.038576,-0.054595,-0.027267,-0.027267,-0.054595,-0.098754,-0.102521,-0.038576,0.0,-0.027267,-0.082046,-0.027267,0.0,0.0,-0.027267,-0.066915,-0.077324,0.0,-0.072303,-0.128904,-0.094845,-0.102521,-0.109682,-0.185981,-0.128904,-0.119658,-0.153539,-0.094845,-0.1131,-0.090773,-0.228877,-0.613415,-0.072303,0.0,-0.027267,-0.125893,-0.771224,1.751133,-0.688666,-0.16098,-0.137568
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
905,-0.045968,-0.237537,-0.250971,0.626463,-0.160959,-0.16098,-0.077324,-0.066915,-0.106159,-0.185981,-0.027267,-0.054595,-0.274007,-0.061062,-0.027267,-0.072303,-0.027267,-0.082046,5.200160,-0.156055,-0.077324,-0.153539,-0.038576,-0.165774,-0.061062,-0.086516,-0.072303,-0.134738,-0.38531,-0.027267,-0.038576,-0.054595,-0.027267,-0.027267,-0.054595,-0.098754,-0.102521,-0.038576,0.0,-0.027267,-0.082046,-0.027267,0.0,0.0,-0.027267,-0.066915,-0.077324,0.0,-0.072303,-0.128904,-0.094845,-0.102521,-0.109682,-0.185981,-0.128904,-0.119658,-0.153539,-0.094845,-0.1131,-0.090773,-0.228877,-0.613415,-0.072303,0.0,-0.027267,-0.125893,-0.771224,1.751133,-0.688666,-0.16098,-0.137568
1791,-0.502645,-0.355681,-0.179716,0.123788,-1.220713,-0.16098,-0.077324,-0.066915,-0.106159,-0.185981,-0.027267,-0.054595,-0.274007,-0.061062,-0.027267,-0.072303,-0.027267,-0.082046,-0.192302,-0.156055,-0.077324,-0.153539,-0.038576,-0.165774,-0.061062,-0.086516,-0.072303,-0.134738,-0.38531,-0.027267,-0.038576,-0.054595,-0.027267,-0.027267,-0.054595,-0.098754,-0.102521,-0.038576,0.0,-0.027267,-0.082046,-0.027267,0.0,0.0,-0.027267,-0.066915,-0.077324,0.0,-0.072303,-0.128904,-0.094845,-0.102521,-0.109682,-0.185981,-0.128904,-0.119658,-0.153539,-0.094845,-0.1131,-0.090773,-0.228877,1.630217,-0.072303,0.0,-0.027267,-0.125893,-0.771224,-0.571059,1.452083,-0.16098,-0.137568
1096,-0.372561,-0.208002,-0.266086,0.123788,-1.220713,-0.16098,-0.077324,-0.066915,-0.106159,-0.185981,-0.027267,-0.054595,-0.274007,-0.061062,-0.027267,-0.072303,-0.027267,-0.082046,-0.192302,-0.156055,-0.077324,-0.153539,-0.038576,-0.165774,-0.061062,-0.086516,-0.072303,-0.134738,-0.38531,-0.027267,-0.038576,-0.054595,-0.027267,-0.027267,-0.054595,-0.098754,-0.102521,-0.038576,0.0,-0.027267,-0.082046,-0.027267,0.0,0.0,-0.027267,-0.066915,-0.077324,0.0,-0.072303,-0.128904,-0.094845,-0.102521,-0.109682,-0.185981,-0.128904,-0.119658,-0.153539,-0.094845,-0.1131,-0.090773,-0.228877,1.630217,-0.072303,0.0,-0.027267,-0.125893,-0.771224,-0.571059,1.452083,-0.16098,-0.137568
235,0.679180,0.175964,-0.184754,0.123788,-1.220713,-0.16098,-0.077324,-0.066915,-0.106159,-0.185981,-0.027267,-0.054595,-0.274007,-0.061062,-0.027267,-0.072303,-0.027267,-0.082046,-0.192302,-0.156055,-0.077324,-0.153539,-0.038576,-0.165774,-0.061062,-0.086516,-0.072303,-0.134738,-0.38531,-0.027267,-0.038576,-0.054595,-0.027267,-0.027267,-0.054595,-0.098754,-0.102521,-0.038576,0.0,-0.027267,-0.082046,-0.027267,0.0,0.0,-0.027267,-0.066915,-0.077324,0.0,-0.072303,-0.128904,-0.094845,-0.102521,-0.109682,-0.185981,-0.128904,-0.119658,-0.153539,-0.094845,-0.1131,-0.090773,-0.228877,1.630217,-0.072303,0.0,-0.027267,-0.125893,-0.771224,-0.571059,1.452083,-0.16098,-0.137568


In [120]:
y_train

900     29.6
1097     3.5
1639     8.8
198     15.5
460     17.6
        ... 
905      6.4
1791     0.7
1096    10.4
235     34.3
1061    10.4
Name: Market Value ($billion), Length: 1346, dtype: float64

In [121]:
#training

In [122]:
inputs = tf.keras.Input(shape=(71,))
x = tf.keras.layers.Dense(128, activation='relu')(inputs)
x = tf.keras.layers.Dense(128, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='linear')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='mse'
)

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=32,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )
    ]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100


In [123]:
#results

In [125]:
y_pred = np.squeeze(model.predict(X_test))

rmse = np.sqrt(np.mean((y_test - y_pred)**2))
r2 = 1 - (np.sum((y_test - y_pred)**2) / np.sum((y_test - y_test.mean())**2))

print("RMSE: {:.2f}".format(rmse))
print(" R^2: {:.4f}".format(r2))



RMSE: 17.33
 R^2: 0.7292
