# **IP2_Run3**

In [None]:
!pip install openai
!pip install python-dotenv
!pip3 install pyomo
!apt install glpk-utils
!pip install glpk

Collecting openai
  Downloading openai-1.33.0-py3-none-any.whl (325 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m325.5/325.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, openai
Successfully installed h11-0.14.0 httpcore-1.0.5 ht

In [None]:

import openai
import os
from IPython.display import Markdown


### **Accessing the GPT4 API**

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv('api_file.env'))
my_api_key = os.environ['api_key_env']
openai.api_key = my_api_key


### **Generate Mathematical Model**

In [None]:
problem = """Your goal is to invest in several of 10 possible investment strategies in the most optimal way. The historic returns of those strategies are stored in the file "investments_data.csv". Each column represents one strategy and the rows are the past investment outcomes. There is no index and the values are separated by a ;.

The costs for investing in a given investment is stored in a vector A, which has one value for each strategy in order.
The values are: [80, 340, 410, 50, 180, 221, 15, 348, 191, 225]

You can only invest once into an investment.

Unfortunately due to other costs and inflation, your available budget at this time is uncertain. There are four possible budget scenarios with different probabilities: scenario 1 with 1000 euros and probability of 0.55, scenario 2  with 1100 euros and probability of 0.4, scenario 3 with 900 euros and probability of 0.04, scenario 4 with 1200 euros and probability of 0.01.
The tolerable probability of exceeding the budget is 0.4.

Please formulate a mean-variance mathematical model for this optimization problem, considering the past performance of investment strategies and the uncertain budget. You can take 2 as the risk parameter r."""


In [None]:

client = openai.OpenAI(api_key=os.environ['api_key_env'])

response1 = client.chat.completions.create(
    messages=[
        {"role": "system", "content": "Please formulate only the variables for this mathematical optimization problem."},
        {
            "role": "user",
            "content": problem
    }
    ],
    model="gpt-4",
    seed = 3
)


In [None]:
print(response1.choices[0].message.content)

Variables:

1. X = Binary decision variable matrix of size n x m where n is the number of investment strategies (n=10) and m is the number of budget scenarios (m=4). If the entry X[i][j] is 1, investment i is chosen in scenario j else it is not chosen.

2. M = Matrix of size n x k where n is the number of investment strategies (n=10) and k is the number of past investment outcomes. Each column represents one strategy and the rows are the past investment outcomes. This is derived from "investments_data.csv".

3. A = Vector of size n (n=10) representing the cost of each investment strategy. The values are [80, 340, 410, 50, 180, 221, 15, 348, 191, 225].

4. P = Vector of size m (m=4) representing the probability of each budget scenario. The values are: [0.55, 0.4, 0.04, 0.01].

5. B = Vector of size m (m=4) representing the budget in each scenario. The values are: [1000, 1100, 900, 1200].

6. r = The risk parameter value (r=2).

7. T = Tolerable probability of exceeding the budget (T=0.4

### **Generate Pyomo Code**

In [None]:

response2 = client.chat.completions.create(
    messages=[
        {"role": "system", "content": "Please formulate only the objective function for this mathematical optimization problem."},
        {
            "role": "user",
            "content": problem + response1.choices[0].message.content
        }
    ],
    model="gpt-4",
    seed = 3
)

In [None]:
print(response2.choices[0].message.content)

Objective Function:

minimize X

Σ { P[j] * Σ { X[i][j] * (r * Var(M[i]) - Mean(M[i])) } } over i={1..10} and j={1..4}

Subject to: 
- Σ { X[i][j] * A[i] } <= B[j] for all budget scenario j={1..4}
- Σ { P[j] * (Σ { X[i][j]*A[i] }) > B[j] } <= T 
   
This formulation aims to minimize the weighted risk-adjusted return of the investment portfolio. The constraints represent the budget limit for each scenario and the tolerability of exceeding the budget. The weights P[j] are the probabilities of the budget scenarios, Var(M[i]) and Mean(M[i]) are the variance and mean of the past returns of investment i, and X[i][j] * A[i] stands for the cost of investment i if chosen in budget scenario j.


In [None]:
response3 = client.chat.completions.create(
    messages=[
        {"role": "system", "content": "Please formulate only the constraints for this mathematical optimization problem."},
        {
            "role": "user",
            "content": problem + response1.choices[0].message.content +  response2.choices[0].message.content
        }
    ],
    model="gpt-4",
    seed = 3
)

In [None]:
print(response3.choices[0].message.content)

1. For each budget scenario j (j=1,2,3,4), the total cost of chosen investments should not exceed the budget:
   Σ (X[i][j] * A[i]) ≤ B[j]   for all j=1..4

2. The probability of exceeding the budget should not exceed the tolerable limit:
   Σ (P[j] * I( Σ (X[i][j] * A[i]) > B[j] )) ≤ T
   Here I() is an indicator function that equals 1 if Σ (X[i][j]*A[i]) > B[j] and 0 otherwise.

3. You can only invest once into an investment:
   Σ (Σ (X[i][j]) across all j) ≤ 1   for all i=1..10

Note: The constraint 3 ensures that each investment is selected at most once across all budget scenarios. If the investment can be repeated across budget scenarios, we can drop this constraint.


In [None]:
response4 = client.chat.completions.create(
    messages=[
        {"role": "system", "content": "Please write a python pyomo code for this optimization problem. Use sample data where needed. Indicate where you use sample data."},
        {
            "role": "user",
            "content": response1.choices[0].message.content + response2.choices[0].message.content + response3.choices[0].message.content
        }
    ],
    model="gpt-4",
    seed = 3
)

In [None]:
Markdown(response4.choices[0].message.content)

Here's an abstract python code using `pyomo` that sets up the above described problem. 

Here, we would use sample data for our matrix 'M' as the 'investments_data.csv' has not been provided. There's also an assumption that the matrix is of dimensions 10x10, i.e., there are 10 past investment outcomes for each of the 10 strategies.

```python
from numpy import mean, var
from pyomo.environ import *

model = ConcreteModel()

# Set dimensions
n = 10
m = 4
k = 10

# Matrix for past investment outcomes for each strategy
M = [[5, 7, 9, 11, 4, 6, 8, 10, 3, 14]*10 for _ in range(n)]

# Other sample data as described in the variables section
A_values = [80, 340, 410, 50, 180, 221, 15, 348, 191, 225]
P_values = [0.55, 0.4, 0.04, 0.01]
B_values = [1000, 1100, 900, 1200]
r_value = 2
T_value = 0.4

# Define model variables and sets
model.investment_strategy = RangeSet(n)
model.budget_scenario = RangeSet(m)
model.X = Var(model.investment_strategy, model.budget_scenario, domain=Binary)
model.A = Param(model.investment_strategy, initialize=lambda model, i: A_values[i-1])
model.P = Param(model.budget_scenario, initialize=lambda model, j: P_values[j-1])
model.B = Param(model.budget_scenario, initialize=lambda model, j: B_values[j-1])

# Objective function
model.obj = Objective(expr=sum(model.P[j]*sum(model.X[i,j]*(r_value*var(M[i-1]) - mean(M[i-1])) for i in model.investment_strategy) for j in model.budget_scenario), sense=minimize)

# Constraints
model.budget_limit = ConstraintList()
for j in model.budget_scenario:
    model.budget_limit.add(sum(model.X[i, j]*model.A[i] for i in model.investment_strategy) <= model.B[j])
    
model.budget_exceed_prob = Constraint(expr=sum(model.P[j]*sum(model.X[i, j]*model.A[i] for i in model.investment_strategy) for j in model.budget_scenario) <= T_value*B_values[-1])
model.single_investment = ConstraintList()
for i in model.investment_strategy:
    model.single_investment.add(sum(model.X[i, j] for j in model.budget_scenario) <= 1)

# Solving the model
solver = SolverFactory('glpk')
solver.solve(model)
```

This code sets up the problem and attempts to solve it, but it is important to note that this problem may not have a feasible or optimal solution with the given and assumed data, specifically the budget limit and the tolerable probability of exceeding the budget. If so, you may have to adjust these parameters or the investment strategies themselves. For real data, you would update the code to read the appropriate csv file into the 'M' matrix and to handle any other requirements of your specific problem.

### **Run the code Generated by GPT4**

In [None]:
from numpy import mean, var
from pyomo.environ import *

model = ConcreteModel()

# Set dimensions
n = 10
m = 4
k = 10

# Matrix for past investment outcomes for each strategy
M = [[5, 7, 9, 11, 4, 6, 8, 10, 3, 14]*10 for _ in range(n)]

# Other sample data as described in the variables section
A_values = [80, 340, 410, 50, 180, 221, 15, 348, 191, 225]
P_values = [0.55, 0.4, 0.04, 0.01]
B_values = [1000, 1100, 900, 1200]
r_value = 2
T_value = 0.4

# Define model variables and sets
model.investment_strategy = RangeSet(n)
model.budget_scenario = RangeSet(m)
model.X = Var(model.investment_strategy, model.budget_scenario, domain=Binary)
model.A = Param(model.investment_strategy, initialize=lambda model, i: A_values[i-1])
model.P = Param(model.budget_scenario, initialize=lambda model, j: P_values[j-1])
model.B = Param(model.budget_scenario, initialize=lambda model, j: B_values[j-1])

# Objective function
model.obj = Objective(expr=sum(model.P[j]*sum(model.X[i,j]*(r_value*var(M[i-1]) - mean(M[i-1])) for i in model.investment_strategy) for j in model.budget_scenario), sense=minimize)

# Constraints
model.budget_limit = ConstraintList()
for j in model.budget_scenario:
    model.budget_limit.add(sum(model.X[i, j]*model.A[i] for i in model.investment_strategy) <= model.B[j])

model.budget_exceed_prob = Constraint(expr=sum(model.P[j]*sum(model.X[i, j]*model.A[i] for i in model.investment_strategy) for j in model.budget_scenario) <= T_value*B_values[-1])
model.single_investment = ConstraintList()
for i in model.investment_strategy:
    model.single_investment.add(sum(model.X[i, j] for j in model.budget_scenario) <= 1)

# Solving the model
solver = SolverFactory('glpk')
solver.solve(model)

{'Problem': [{'Name': 'unknown', 'Lower bound': 0.0, 'Upper bound': 0.0, 'Number of objectives': 1, 'Number of constraints': 15, 'Number of variables': 40, 'Number of nonzeros': 120, 'Sense': 'minimize'}], 'Solver': [{'Status': 'ok', 'Termination condition': 'optimal', 'Statistics': {'Branch and bound': {'Number of bounded subproblems': '1', 'Number of created subproblems': '1'}}, 'Error rc': 0, 'Time': 0.004428863525390625}], 'Solution': [OrderedDict([('number of solutions', 0), ('number of solutions displayed', 0)])]}

In [None]:
from numpy import mean, var
from pyomo.environ import *
import pandas as pd
model = ConcreteModel()

# Set dimensions
n = 10
m = 4
k = 10

# Matrix for past investment outcomes for each strategy
df = pd.read_csv("investments_data.csv",sep=";", header=None)
headers = []
for i in range(len(df.columns)):
    headers.append(str((i+1)))
df.columns = headers
M= df.values.tolist()

# Other sample data as described in the variables section
A_values = [80, 340, 410, 50, 180, 221, 15, 348, 191, 225]
P_values = [0.55, 0.4, 0.04, 0.01]
B_values = [1000, 1100, 900, 1200]
r_value = 2
T_value = 0.4

# Define model variables and sets
model.investment_strategy = RangeSet(n)
model.budget_scenario = RangeSet(m)
model.X = Var(model.investment_strategy, model.budget_scenario, domain=Binary)
model.A = Param(model.investment_strategy, initialize=lambda model, i: A_values[i-1])
model.P = Param(model.budget_scenario, initialize=lambda model, j: P_values[j-1])
model.B = Param(model.budget_scenario, initialize=lambda model, j: B_values[j-1])

# Objective function
model.obj = Objective(expr=sum(model.P[j]*sum(model.X[i,j]*(r_value*var(M[i-1]) - mean(M[i-1])) for i in model.investment_strategy) for j in model.budget_scenario), sense=minimize)

# Constraints
model.budget_limit = ConstraintList()
for j in model.budget_scenario:
    model.budget_limit.add(sum(model.X[i, j]*model.A[i] for i in model.investment_strategy) <= model.B[j])

model.budget_exceed_prob = Constraint(expr=sum(model.P[j]*sum(model.X[i, j]*model.A[i] for i in model.investment_strategy) for j in model.budget_scenario) <= T_value*B_values[-1])
model.single_investment = ConstraintList()
for i in model.investment_strategy:
    model.single_investment.add(sum(model.X[i, j] for j in model.budget_scenario) <= 1)

# Solving the model
solver = SolverFactory('glpk')
solver.solve(model)
print(model.obj())

0.0


### **Edit and Run the code for the mathematical model produced by GPT4 (Circumstantial)**

In [None]:
from numpy import mean, var
from pyomo.environ import *
import pandas as pd
model = ConcreteModel()

# Set dimensions
n = 10
m = 4
k = 10

# Matrix for past investment outcomes for each strategy
df = pd.read_csv("investments_data.csv",sep=";", header=None)
headers = []
for i in range(len(df.columns)):
    headers.append(str((i+1)))
df.columns = headers
M= df.values.tolist()

# Other sample data as described in the variables section
A_values = [80, 340, 410, 50, 180, 221, 15, 348, 191, 225]
P_values = [0.55, 0.4, 0.04, 0.01]
B_values = [1000, 1100, 900, 1200]
r_value = 2
T_value = 0.4

# Define model variables and sets
model.investment_strategy = RangeSet(n)
model.budget_scenario = RangeSet(m)
model.X = Var(model.investment_strategy, model.budget_scenario, domain=Binary)
model.A = Param(model.investment_strategy, initialize=lambda model, i: A_values[i-1])
model.P = Param(model.budget_scenario, initialize=lambda model, j: P_values[j-1])
model.B = Param(model.budget_scenario, initialize=lambda model, j: B_values[j-1])
model.y = Var(model.budget_scenario, domain=Binary)

# Objective function
model.obj = Objective(expr=sum(model.P[j]*sum(model.X[i,j]*(r_value*var(M[i]) - mean(M[i])) for i in model.investment_strategy) for j in model.budget_scenario), sense=minimize)

# Constraints
model.budget_limit = ConstraintList()
for j in model.budget_scenario:
    model.budget_limit.add(sum(model.X[i, j]*model.A[i] for i in model.investment_strategy) <= model.B[j])

def sum_constraint(model, j):
    return sum(model.X[i, j] * model.A[i] for i in model.investment_strategy) >= model.B[j] - (1 - model.y[j]) * 1e6
model.SumConstraint = Constraint(model.budget_scenario, rule=sum_constraint)

def sum_constraint2(model, j):
    return sum(model.X[i, j] * model.A[i] for i in model.investment_strategy) <= model.B[j] + model.y[j] * 1e6
model.SumConstraint2 = Constraint(model.budget_scenario, rule=sum_constraint2)

def indicator_constraint(model):
    return sum(model.P[j] * model.y[j] for j in model.budget_scenario) <= T_value
model.IndicatorConstraint = Constraint(rule=indicator_constraint)

model.single_investment = ConstraintList()
for i in model.investment_strategy:
    model.single_investment.add(sum(model.X[i, j] for j in model.budget_scenario) <= 1)

# Solving the model
solver = SolverFactory('glpk')
solver.solve(model)
print(model.obj())

0.0
