In [4]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import re

In [5]:
data = pd.read_csv('random_forest_data.csv')

In [6]:
def convert_currency(value):
    value = re.sub(r'[^\d.]', '', value)  # Remove non-numeric characters
    return float(value)

data['Productivity and Business Processes'] = data['Productivity and Business Processes'].apply(convert_currency)
data['Intelligent Cloud'] = data['Intelligent Cloud'].apply(convert_currency)
data['More Personal Computing'] = data['More Personal Computing'].apply(convert_currency)
data['Revenue'] = data['Revenue'].apply(convert_currency)

data

Unnamed: 0,Year,Productivity and Business Processes,Intelligent Cloud,More Personal Computing,Revenue
0,Q1-16,6306.0,5892.0,9462.0,20379.0
1,Q2-16,6690.0,6343.0,12473.0,23796.0
2,Q3-16,6521.0,6096.0,9539.0,20531.0
3,Q4-16,6970.0,6711.0,8960.0,20614.0
4,Q1-17,6658.0,6382.0,9294.0,20453.0
5,Q2-17,7382.0,6861.0,11823.0,24090.0
6,Q3-17,7958.0,6763.0,8836.0,22090.0
7,Q4-17,8446.0,7434.0,8820.0,23317.0
8,Q1-18,8238.0,6922.0,9378.0,24538.0
9,Q2-18,8953.0,7795.0,12170.0,28918.0


In [7]:
features = data[['Productivity and Business Processes', 'Intelligent Cloud', 'More Personal Computing']]
target = data['Revenue']

train_size = int(0.8 * len(data))
train_features = features[:train_size]
train_target = target[:train_size]
test_features = features[train_size:]
test_target = target[train_size:]

In [8]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(train_features, train_target)

predictions = model.predict(test_features)


feature_importance = model.feature_importances_
feature_importance_df = pd.DataFrame({'Feature': features.columns, 'Importance': feature_importance})
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
print("Feature Importance:")
print(feature_importance_df)

best_segment = feature_importance_df.iloc[0]['Feature']
print("Best Segment:", best_segment)

Feature Importance:
                               Feature  Importance
0  Productivity and Business Processes    0.499098
1                    Intelligent Cloud    0.428116
2              More Personal Computing    0.072786
Best Segment: Productivity and Business Processes
